(Fsyntax_table_p, Fchar_syntax, Fmatching_paren, Fmodify_syntax_entry):
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 #include <config.h>
22 #include <ctype.h>
23 #include "lisp.h"
24 #include "commands.h"
25 #include "buffer.h"
26 #include "syntax.h"
27
28 Lisp_Object Qsyntax_table_p, Qsyntax_table;
29
30 static void scan_sexps_forward ();
31 static int char_quoted ();
32
33 int words_include_escapes;
34
35 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
36 if not compiled with GCC. No need to mark it, since it is used
37 only very temporarily. */
38 Lisp_Object syntax_temp;
39
40 /* This is the internal form of the parse state used in parse-partial-sexp. */
41
42 struct lisp_parse_state
43 {
44 int depth; /* Depth at end of parsing */
45 int instring; /* -1 if not within string, else desired terminator. */
46 int incomment; /* Nonzero if within a comment at end of parsing */
47 int comstyle; /* comment style a=0, or b=1 */
48 int quoted; /* Nonzero if just after an escape char at end of parsing */
49 int thislevelstart; /* Char number of most recent start-of-expression at current level */
50 int prevlevelstart; /* Char number of start of containing expression */
51 int location; /* Char number at which parsing stopped. */
52 int mindepth; /* Minimum depth seen while scanning. */
53 int comstart; /* Position just after last comment starter. */
54 };
55 \f
56 /* These variables are a cache for finding the start of a defun.
57 find_start_pos is the place for which the defun start was found.
58 find_start_value is the defun start position found for it.
59 find_start_buffer is the buffer it was found in.
60 find_start_begv is the BEGV value when it was found.
61 find_start_modiff is the value of MODIFF when it was found. */
62
63 static int find_start_pos;
64 static int find_start_value;
65 static struct buffer *find_start_buffer;
66 static int find_start_begv;
67 static int find_start_modiff;
68
69 /* Find a defun-start that is the last one before POS (or nearly the last).
70 We record what we find, so that another call in the same area
71 can return the same value right away. */
72
73 static int
74 find_defun_start (pos)
75 int pos;
76 {
77 int tem;
78 int shortage;
79
80 /* Use previous finding, if it's valid and applies to this inquiry. */
81 if (current_buffer == find_start_buffer
82 /* Reuse the defun-start even if POS is a little farther on.
83 POS might be in the next defun, but that's ok.
84 Our value may not be the best possible, but will still be usable. */
85 && pos <= find_start_pos + 1000
86 && pos >= find_start_value
87 && BEGV == find_start_begv
88 && MODIFF == find_start_modiff)
89 return find_start_value;
90
91 /* Back up to start of line. */
92 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
93
94 while (tem > BEGV)
95 {
96 /* Open-paren at start of line means we found our defun-start. */
97 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
98 break;
99 /* Move to beg of previous line. */
100 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
101 }
102
103 /* Record what we found, for the next try. */
104 find_start_value = tem;
105 find_start_buffer = current_buffer;
106 find_start_modiff = MODIFF;
107 find_start_begv = BEGV;
108 find_start_pos = pos;
109
110 return find_start_value;
111 }
112 \f
113 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
114 "Return t if OBJECT is a syntax table.\n\
115 Currently, any char-table counts as a syntax table.")
116 (object)
117 Lisp_Object object;
118 {
119 if (CHAR_TABLE_P (object)
120 && XCHAR_TABLE (object)->purpose == Qsyntax_table)
121 return Qt;
122 return Qnil;
123 }
124
125 static void
126 check_syntax_table (obj)
127 Lisp_Object obj;
128 {
129 if (!(CHAR_TABLE_P (obj)
130 && XCHAR_TABLE (obj)->purpose == Qsyntax_table))
131 wrong_type_argument (Qsyntax_table_p, obj);
132 }
133
134 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
135 "Return the current syntax table.\n\
136 This is the one specified by the current buffer.")
137 ()
138 {
139 return current_buffer->syntax_table;
140 }
141
142 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
143 Sstandard_syntax_table, 0, 0, 0,
144 "Return the standard syntax table.\n\
145 This is the one used for new buffers.")
146 ()
147 {
148 return Vstandard_syntax_table;
149 }
150
151 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
152 "Construct a new syntax table and return it.\n\
153 It is a copy of the TABLE, which defaults to the standard syntax table.")
154 (table)
155 Lisp_Object table;
156 {
157 Lisp_Object copy;
158
159 if (!NILP (table))
160 check_syntax_table (table);
161 else
162 table = Vstandard_syntax_table;
163
164 copy = Fcopy_sequence (table);
165 Fset_char_table_parent (copy, Vstandard_syntax_table);
166 return copy;
167 }
168
169 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
170 "Select a new syntax table for the current buffer.\n\
171 One argument, a syntax table.")
172 (table)
173 Lisp_Object table;
174 {
175 check_syntax_table (table);
176 current_buffer->syntax_table = table;
177 /* Indicate that this buffer now has a specified syntax table. */
178 current_buffer->local_var_flags
179 |= XFASTINT (buffer_local_flags.syntax_table);
180 return table;
181 }
182 \f
183 /* Convert a letter which signifies a syntax code
184 into the code it signifies.
185 This is used by modify-syntax-entry, and other things. */
186
187 unsigned char syntax_spec_code[0400] =
188 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
189 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 (char) Swhitespace, 0377, (char) Sstring, 0377,
193 (char) Smath, 0377, 0377, (char) Squote,
194 (char) Sopen, (char) Sclose, 0377, 0377,
195 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
196 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
197 0377, 0377, 0377, 0377,
198 (char) Scomment, 0377, (char) Sendcomment, 0377,
199 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
200 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
202 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
203 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
207 };
208
209 /* Indexed by syntax code, give the letter that describes it. */
210
211 char syntax_code_spec[14] =
212 {
213 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
214 };
215 \f
216 /* Look up the value for CHARACTER in syntax table TABLE's parent
217 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
218 for CHARACTER. It's actually used only when not compiled with GCC. */
219
220 Lisp_Object
221 syntax_parent_lookup (table, character)
222 Lisp_Object table;
223 int character;
224 {
225 Lisp_Object value;
226
227 while (1)
228 {
229 table = XCHAR_TABLE (table)->parent;
230 if (NILP (table))
231 return Qnil;
232
233 value = XCHAR_TABLE (table)->contents[character];
234 if (!NILP (value))
235 return value;
236 }
237 }
238
239 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
240 "Return the syntax code of CHARACTER, described by a character.\n\
241 For example, if CHARACTER is a word constituent,\n\
242 the character `w' is returned.\n\
243 The characters that correspond to various syntax codes\n\
244 are listed in the documentation of `modify-syntax-entry'.")
245 (character)
246 Lisp_Object character;
247 {
248 int char_int;
249 CHECK_NUMBER (character, 0);
250 char_int = XINT (character);
251 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
252 }
253
254 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
255 "Return the matching parenthesis of CHARACTER, or nil if none.")
256 (character)
257 Lisp_Object character;
258 {
259 int char_int, code;
260 CHECK_NUMBER (character, 0);
261 char_int = XINT (character);
262 code = SYNTAX (char_int);
263 if (code == Sopen || code == Sclose)
264 return make_number (SYNTAX_MATCH (char_int));
265 return Qnil;
266 }
267
268 /* This comment supplies the doc string for modify-syntax-entry,
269 for make-docfile to see. We cannot put this in the real DEFUN
270 due to limits in the Unix cpp.
271
272 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
273 "Set syntax for character CHAR according to string S.\n\
274 The syntax is changed only for table TABLE, which defaults to\n\
275 the current buffer's syntax table.\n\
276 The first character of S should be one of the following:\n\
277 Space or - whitespace syntax. w word constituent.\n\
278 _ symbol constituent. . punctuation.\n\
279 ( open-parenthesis. ) close-parenthesis.\n\
280 \" string quote. \\ escape.\n\
281 $ paired delimiter. ' expression quote or prefix operator.\n\
282 < comment starter. > comment ender.\n\
283 / character-quote. @ inherit from `standard-syntax-table'.\n\
284 \n\
285 Only single-character comment start and end sequences are represented thus.\n\
286 Two-character sequences are represented as described below.\n\
287 The second character of S is the matching parenthesis,\n\
288 used only if the first character is `(' or `)'.\n\
289 Any additional characters are flags.\n\
290 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
291 1 means CHAR is the start of a two-char comment start sequence.\n\
292 2 means CHAR is the second character of such a sequence.\n\
293 3 means CHAR is the start of a two-char comment end sequence.\n\
294 4 means CHAR is the second character of such a sequence.\n\
295 \n\
296 There can be up to two orthogonal comment sequences. This is to support\n\
297 language modes such as C++. By default, all comment sequences are of style\n\
298 a, but you can set the comment sequence style to b (on the second character\n\
299 of a comment-start, or the first character of a comment-end sequence) using\n\
300 this flag:\n\
301 b means CHAR is part of comment sequence b.\n\
302 \n\
303 p means CHAR is a prefix character for `backward-prefix-chars';\n\
304 such characters are treated as whitespace when they occur\n\
305 between expressions.")
306 (char, s, table)
307 */
308
309 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
310 /* I really don't know why this is interactive
311 help-form should at least be made useful whilst reading the second arg
312 */
313 "cSet syntax for character: \nsSet syntax for %s to: ",
314 0 /* See immediately above */)
315 (c, newentry, syntax_table)
316 Lisp_Object c, newentry, syntax_table;
317 {
318 register unsigned char *p;
319 register enum syntaxcode code;
320 int val;
321 Lisp_Object match;
322
323 CHECK_NUMBER (c, 0);
324 CHECK_STRING (newentry, 1);
325
326 if (NILP (syntax_table))
327 syntax_table = current_buffer->syntax_table;
328 else
329 check_syntax_table (syntax_table);
330
331 p = XSTRING (newentry)->data;
332 code = (enum syntaxcode) syntax_spec_code[*p++];
333 if (((int) code & 0377) == 0377)
334 error ("invalid syntax description letter: %c", c);
335
336 if (code == Sinherit)
337 {
338 SET_RAW_SYNTAX_ENTRY (syntax_table, c, Qnil);
339 return Qnil;
340 }
341
342 if (*p)
343 {
344 XSETINT (match, *p++);
345 if (XFASTINT (match) == ' ')
346 match = Qnil;
347 }
348 else
349 match = Qnil;
350
351 val = (int) code;
352 while (*p)
353 switch (*p++)
354 {
355 case '1':
356 val |= 1 << 16;
357 break;
358
359 case '2':
360 val |= 1 << 17;
361 break;
362
363 case '3':
364 val |= 1 << 18;
365 break;
366
367 case '4':
368 val |= 1 << 19;
369 break;
370
371 case 'p':
372 val |= 1 << 20;
373 break;
374
375 case 'b':
376 val |= 1 << 21;
377 break;
378 }
379
380 SET_RAW_SYNTAX_ENTRY (syntax_table, c,
381 Fcons (make_number (val), match));
382
383 return Qnil;
384 }
385 \f
386 /* Dump syntax table to buffer in human-readable format */
387
388 static void
389 describe_syntax (value)
390 Lisp_Object value;
391 {
392 register enum syntaxcode code;
393 char desc, match, start1, start2, end1, end2, prefix, comstyle;
394 char str[2];
395 Lisp_Object first, match_lisp;
396
397 Findent_to (make_number (16), make_number (1));
398
399 if (NILP (value))
400 {
401 insert_string ("inherit");
402 return;
403 }
404
405 if (!CONSP (value))
406 {
407 insert_string ("invalid");
408 return;
409 }
410
411 first = XCONS (value)->car;
412 match_lisp = XCONS (value)->cdr;
413
414 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
415 {
416 insert_string ("invalid");
417 return;
418 }
419
420 code = (enum syntaxcode) (first & 0377);
421 start1 = (XINT (first) >> 16) & 1;
422 start2 = (XINT (first) >> 17) & 1;
423 end1 = (XINT (first) >> 18) & 1;
424 end2 = (XINT (first) >> 19) & 1;
425 prefix = (XINT (first) >> 20) & 1;
426 comstyle = (XINT (first) >> 21) & 1;
427
428 if ((int) code < 0 || (int) code >= (int) Smax)
429 {
430 insert_string ("invalid");
431 return;
432 }
433 desc = syntax_code_spec[(int) code];
434
435 str[0] = desc, str[1] = 0;
436 insert (str, 1);
437
438 str[0] = !NILP (match_lisp) ? XINT (match_lisp) : ' ';
439 insert (str, 1);
440
441 if (start1)
442 insert ("1", 1);
443 if (start2)
444 insert ("2", 1);
445
446 if (end1)
447 insert ("3", 1);
448 if (end2)
449 insert ("4", 1);
450
451 if (prefix)
452 insert ("p", 1);
453 if (comstyle)
454 insert ("b", 1);
455
456 insert_string ("\twhich means: ");
457
458 switch (SWITCH_ENUM_CAST (code))
459 {
460 case Swhitespace:
461 insert_string ("whitespace"); break;
462 case Spunct:
463 insert_string ("punctuation"); break;
464 case Sword:
465 insert_string ("word"); break;
466 case Ssymbol:
467 insert_string ("symbol"); break;
468 case Sopen:
469 insert_string ("open"); break;
470 case Sclose:
471 insert_string ("close"); break;
472 case Squote:
473 insert_string ("quote"); break;
474 case Sstring:
475 insert_string ("string"); break;
476 case Smath:
477 insert_string ("math"); break;
478 case Sescape:
479 insert_string ("escape"); break;
480 case Scharquote:
481 insert_string ("charquote"); break;
482 case Scomment:
483 insert_string ("comment"); break;
484 case Sendcomment:
485 insert_string ("endcomment"); break;
486 default:
487 insert_string ("invalid");
488 return;
489 }
490
491 if (!NILP (match_lisp))
492 {
493 insert_string (", matches ");
494 insert_char (XINT (match_lisp));
495 }
496
497 if (start1)
498 insert_string (",\n\t is the first character of a comment-start sequence");
499 if (start2)
500 insert_string (",\n\t is the second character of a comment-start sequence");
501
502 if (end1)
503 insert_string (",\n\t is the first character of a comment-end sequence");
504 if (end2)
505 insert_string (",\n\t is the second character of a comment-end sequence");
506 if (comstyle)
507 insert_string (" (comment style b)");
508
509 if (prefix)
510 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
511
512 insert_string ("\n");
513 }
514
515 static Lisp_Object
516 describe_syntax_1 (vector)
517 Lisp_Object vector;
518 {
519 struct buffer *old = current_buffer;
520 set_buffer_internal (XBUFFER (Vstandard_output));
521 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
522 call0 (intern ("help-mode"));
523 set_buffer_internal (old);
524 return Qnil;
525 }
526
527 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
528 "Describe the syntax specifications in the syntax table.\n\
529 The descriptions are inserted in a buffer, which is then displayed.")
530 ()
531 {
532 internal_with_output_to_temp_buffer
533 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
534
535 return Qnil;
536 }
537 \f
538 /* Return the position across COUNT words from FROM.
539 If that many words cannot be found before the end of the buffer, return 0.
540 COUNT negative means scan backward and stop at word beginning. */
541
542 scan_words (from, count)
543 register int from, count;
544 {
545 register int beg = BEGV;
546 register int end = ZV;
547 register int code;
548 int charcode;
549
550 immediate_quit = 1;
551 QUIT;
552
553 while (count > 0)
554 {
555 while (1)
556 {
557 if (from == end)
558 {
559 immediate_quit = 0;
560 return 0;
561 }
562 charcode = FETCH_CHAR (from);
563 code = SYNTAX (charcode);
564 if (words_include_escapes
565 && (code == Sescape || code == Scharquote))
566 break;
567 if (code == Sword)
568 break;
569 from++;
570 }
571 while (1)
572 {
573 if (from == end) break;
574 charcode = FETCH_CHAR (from);
575 code = SYNTAX (charcode);
576 if (!(words_include_escapes
577 && (code == Sescape || code == Scharquote)))
578 if (code != Sword)
579 break;
580 from++;
581 }
582 count--;
583 }
584 while (count < 0)
585 {
586 while (1)
587 {
588 if (from == beg)
589 {
590 immediate_quit = 0;
591 return 0;
592 }
593 charcode = FETCH_CHAR (from - 1);
594 code = SYNTAX (charcode);
595 if (words_include_escapes
596 && (code == Sescape || code == Scharquote))
597 break;
598 if (code == Sword)
599 break;
600 from--;
601 }
602 while (1)
603 {
604 if (from == beg) break;
605 charcode = FETCH_CHAR (from - 1);
606 code = SYNTAX (charcode);
607 if (!(words_include_escapes
608 && (code == Sescape || code == Scharquote)))
609 if (code != Sword)
610 break;
611 from--;
612 }
613 count++;
614 }
615
616 immediate_quit = 0;
617
618 return from;
619 }
620
621 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
622 "Move point forward ARG words (backward if ARG is negative).\n\
623 Normally returns t.\n\
624 If an edge of the buffer is reached, point is left there\n\
625 and nil is returned.")
626 (count)
627 Lisp_Object count;
628 {
629 int val;
630 CHECK_NUMBER (count, 0);
631
632 if (!(val = scan_words (point, XINT (count))))
633 {
634 SET_PT (XINT (count) > 0 ? ZV : BEGV);
635 return Qnil;
636 }
637 SET_PT (val);
638 return Qt;
639 }
640 \f
641 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
642 "Move forward across up to N comments. If N is negative, move backward.\n\
643 Stop scanning if we find something other than a comment or whitespace.\n\
644 Set point to where scanning stops.\n\
645 If N comments are found as expected, with nothing except whitespace\n\
646 between them, return t; otherwise return nil.")
647 (count)
648 Lisp_Object count;
649 {
650 register int from;
651 register int stop;
652 register int c, c1;
653 register enum syntaxcode code;
654 int comstyle = 0; /* style of comment encountered */
655 int found;
656 int count1;
657
658 CHECK_NUMBER (count, 0);
659 count1 = XINT (count);
660
661 immediate_quit = 1;
662 QUIT;
663
664 from = PT;
665
666 while (count1 > 0)
667 {
668 stop = ZV;
669 do
670 {
671 if (from == stop)
672 {
673 SET_PT (from);
674 immediate_quit = 0;
675 return Qnil;
676 }
677 c = FETCH_CHAR (from);
678 code = SYNTAX (c);
679 from++;
680 comstyle = 0;
681 if (from < stop && SYNTAX_COMSTART_FIRST (c)
682 && (c1 = FETCH_CHAR (from),
683 SYNTAX_COMSTART_SECOND (c1)))
684 {
685 /* We have encountered a comment start sequence and we
686 are ignoring all text inside comments. We must record
687 the comment style this sequence begins so that later,
688 only a comment end of the same style actually ends
689 the comment section. */
690 code = Scomment;
691 comstyle = SYNTAX_COMMENT_STYLE (c1);
692 from++;
693 }
694 }
695 while (code == Swhitespace || code == Sendcomment);
696 if (code != Scomment)
697 {
698 immediate_quit = 0;
699 SET_PT (from - 1);
700 return Qnil;
701 }
702 /* We're at the start of a comment. */
703 while (1)
704 {
705 if (from == stop)
706 {
707 immediate_quit = 0;
708 SET_PT (from);
709 return Qnil;
710 }
711 c = FETCH_CHAR (from);
712 from++;
713 if (SYNTAX (c) == Sendcomment
714 && SYNTAX_COMMENT_STYLE (c) == comstyle)
715 /* we have encountered a comment end of the same style
716 as the comment sequence which began this comment
717 section */
718 break;
719 if (from < stop && SYNTAX_COMEND_FIRST (c)
720 && (c1 = FETCH_CHAR (from),
721 SYNTAX_COMEND_SECOND (c1))
722 && SYNTAX_COMMENT_STYLE (c) == comstyle)
723 /* we have encountered a comment end of the same style
724 as the comment sequence which began this comment
725 section */
726 { from++; break; }
727 }
728 /* We have skipped one comment. */
729 count1--;
730 }
731
732 while (count1 < 0)
733 {
734 stop = BEGV;
735 while (from > stop)
736 {
737 int quoted;
738
739 from--;
740 quoted = char_quoted (from);
741 if (quoted)
742 from--;
743 c = FETCH_CHAR (from);
744 code = SYNTAX (c);
745 comstyle = 0;
746 if (code == Sendcomment)
747 comstyle = SYNTAX_COMMENT_STYLE (c);
748 if (from > stop && SYNTAX_COMEND_SECOND (c)
749 && (c1 = FETCH_CHAR (from - 1),
750 SYNTAX_COMEND_FIRST (c1))
751 && !char_quoted (from - 1))
752 {
753 /* We must record the comment style encountered so that
754 later, we can match only the proper comment begin
755 sequence of the same style. */
756 code = Sendcomment;
757 comstyle = SYNTAX_COMMENT_STYLE (c1);
758 from--;
759 }
760
761 if (code == Sendcomment && !quoted)
762 {
763 #if 0
764 if (code != SYNTAX (c))
765 /* For a two-char comment ender, we can assume
766 it does end a comment. So scan back in a simple way. */
767 {
768 if (from != stop) from--;
769 while (1)
770 {
771 if ((c = FETCH_CHAR (from),
772 SYNTAX (c) == Scomment)
773 && SYNTAX_COMMENT_STYLE (c) == comstyle)
774 break;
775 if (from == stop)
776 {
777 immediate_quit = 0;
778 SET_PT (from);
779 return Qnil;
780 }
781 from--;
782 if (SYNTAX_COMSTART_SECOND (c)
783 && (c1 = FETCH_CHAR (from),
784 SYNTAX_COMSTART_FIRST (c1))
785 && SYNTAX_COMMENT_STYLE (c) == comstyle
786 && !char_quoted (from))
787 break;
788 }
789 break;
790 }
791 #endif /* 0 */
792
793 /* Look back, counting the parity of string-quotes,
794 and recording the comment-starters seen.
795 When we reach a safe place, assume that's not in a string;
796 then step the main scan to the earliest comment-starter seen
797 an even number of string quotes away from the safe place.
798
799 OFROM[I] is position of the earliest comment-starter seen
800 which is I+2X quotes from the comment-end.
801 PARITY is current parity of quotes from the comment end. */
802 {
803 int parity = 0;
804 char my_stringend = 0;
805 int string_lossage = 0;
806 int comment_end = from;
807 int comstart_pos = 0;
808 int comstart_parity = 0;
809 int scanstart = from - 1;
810
811 /* At beginning of range to scan, we're outside of strings;
812 that determines quote parity to the comment-end. */
813 while (from != stop)
814 {
815 /* Move back and examine a character. */
816 from--;
817
818 c = FETCH_CHAR (from);
819 code = SYNTAX (c);
820
821 /* If this char is the second of a 2-char comment sequence,
822 back up and give the pair the appropriate syntax. */
823 if (from > stop && SYNTAX_COMEND_SECOND (c)
824 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
825 {
826 code = Sendcomment;
827 from--;
828 c = FETCH_CHAR (from);
829 }
830
831 /* If this char starts a 2-char comment start sequence,
832 treat it like a 1-char comment starter. */
833 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
834 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
835 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
836 code = Scomment;
837
838 /* Ignore escaped characters. */
839 if (char_quoted (from))
840 continue;
841
842 /* Track parity of quotes. */
843 if (code == Sstring)
844 {
845 parity ^= 1;
846 if (my_stringend == 0)
847 my_stringend = c;
848 /* If we have two kinds of string delimiters.
849 There's no way to grok this scanning backwards. */
850 else if (my_stringend != c)
851 string_lossage = 1;
852 }
853
854 /* Record comment-starters according to that
855 quote-parity to the comment-end. */
856 if (code == Scomment)
857 {
858 comstart_parity = parity;
859 comstart_pos = from;
860 }
861
862 /* If we find another earlier comment-ender,
863 any comment-starts earlier than that don't count
864 (because they go with the earlier comment-ender). */
865 if (code == Sendcomment
866 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
867 break;
868
869 /* Assume a defun-start point is outside of strings. */
870 if (code == Sopen
871 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
872 break;
873 }
874
875 if (comstart_pos == 0)
876 from = comment_end;
877 /* If the earliest comment starter
878 is followed by uniform paired string quotes or none,
879 we know it can't be inside a string
880 since if it were then the comment ender would be inside one.
881 So it does start a comment. Skip back to it. */
882 else if (comstart_parity == 0 && !string_lossage)
883 from = comstart_pos;
884 else
885 {
886 /* We had two kinds of string delimiters mixed up
887 together. Decode this going forwards.
888 Scan fwd from the previous comment ender
889 to the one in question; this records where we
890 last passed a comment starter. */
891 struct lisp_parse_state state;
892 scan_sexps_forward (&state, find_defun_start (comment_end),
893 comment_end - 1, -10000, 0, Qnil, 0);
894 if (state.incomment)
895 from = state.comstart;
896 else
897 /* We can't grok this as a comment; scan it normally. */
898 from = comment_end;
899 }
900 }
901 /* We have skipped one comment. */
902 break;
903 }
904 else if ((code != Swhitespace && code != Scomment) || quoted)
905 {
906 immediate_quit = 0;
907 SET_PT (from + 1);
908 return Qnil;
909 }
910 }
911
912 count1++;
913 }
914
915 SET_PT (from);
916 immediate_quit = 0;
917 return Qt;
918 }
919 \f
920 int parse_sexp_ignore_comments;
921
922 Lisp_Object
923 scan_lists (from, count, depth, sexpflag)
924 register int from;
925 int count, depth, sexpflag;
926 {
927 Lisp_Object val;
928 register int stop;
929 register int c;
930 unsigned char stringterm;
931 int quoted;
932 int mathexit = 0;
933 register enum syntaxcode code;
934 int min_depth = depth; /* Err out if depth gets less than this. */
935 int comstyle = 0; /* style of comment encountered */
936
937 if (depth > 0) min_depth = 0;
938
939 immediate_quit = 1;
940 QUIT;
941
942 while (count > 0)
943 {
944 stop = ZV;
945 while (from < stop)
946 {
947 c = FETCH_CHAR (from);
948 code = SYNTAX (c);
949 from++;
950 if (from < stop && SYNTAX_COMSTART_FIRST (c)
951 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
952 && parse_sexp_ignore_comments)
953 {
954 /* we have encountered a comment start sequence and we
955 are ignoring all text inside comments. we must record
956 the comment style this sequence begins so that later,
957 only a comment end of the same style actually ends
958 the comment section */
959 code = Scomment;
960 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
961 from++;
962 }
963
964 if (SYNTAX_PREFIX (c))
965 continue;
966
967 switch (SWITCH_ENUM_CAST (code))
968 {
969 case Sescape:
970 case Scharquote:
971 if (from == stop) goto lose;
972 from++;
973 /* treat following character as a word constituent */
974 case Sword:
975 case Ssymbol:
976 if (depth || !sexpflag) break;
977 /* This word counts as a sexp; return at end of it. */
978 while (from < stop)
979 {
980 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
981 {
982 case Scharquote:
983 case Sescape:
984 from++;
985 if (from == stop) goto lose;
986 break;
987 case Sword:
988 case Ssymbol:
989 case Squote:
990 break;
991 default:
992 goto done;
993 }
994 from++;
995 }
996 goto done;
997
998 case Scomment:
999 if (!parse_sexp_ignore_comments) break;
1000 while (1)
1001 {
1002 if (from == stop)
1003 {
1004 if (depth == 0)
1005 goto done;
1006 goto lose;
1007 }
1008 c = FETCH_CHAR (from);
1009 if (SYNTAX (c) == Sendcomment
1010 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1011 /* we have encountered a comment end of the same style
1012 as the comment sequence which began this comment
1013 section */
1014 break;
1015 from++;
1016 if (from < stop && SYNTAX_COMEND_FIRST (c)
1017 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1018 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1019 /* we have encountered a comment end of the same style
1020 as the comment sequence which began this comment
1021 section */
1022 { from++; break; }
1023 }
1024 break;
1025
1026 case Smath:
1027 if (!sexpflag)
1028 break;
1029 if (from != stop && c == FETCH_CHAR (from))
1030 from++;
1031 if (mathexit)
1032 {
1033 mathexit = 0;
1034 goto close1;
1035 }
1036 mathexit = 1;
1037
1038 case Sopen:
1039 if (!++depth) goto done;
1040 break;
1041
1042 case Sclose:
1043 close1:
1044 if (!--depth) goto done;
1045 if (depth < min_depth)
1046 error ("Containing expression ends prematurely");
1047 break;
1048
1049 case Sstring:
1050 stringterm = FETCH_CHAR (from - 1);
1051 while (1)
1052 {
1053 if (from >= stop) goto lose;
1054 if (FETCH_CHAR (from) == stringterm) break;
1055 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1056 {
1057 case Scharquote:
1058 case Sescape:
1059 from++;
1060 }
1061 from++;
1062 }
1063 from++;
1064 if (!depth && sexpflag) goto done;
1065 break;
1066 }
1067 }
1068
1069 /* Reached end of buffer. Error if within object, return nil if between */
1070 if (depth) goto lose;
1071
1072 immediate_quit = 0;
1073 return Qnil;
1074
1075 /* End of object reached */
1076 done:
1077 count--;
1078 }
1079
1080 while (count < 0)
1081 {
1082 stop = BEGV;
1083 while (from > stop)
1084 {
1085 from--;
1086 if (quoted = char_quoted (from))
1087 from--;
1088 c = FETCH_CHAR (from);
1089 code = SYNTAX (c);
1090 comstyle = 0;
1091 if (code == Sendcomment)
1092 comstyle = SYNTAX_COMMENT_STYLE (c);
1093 if (from > stop && SYNTAX_COMEND_SECOND (c)
1094 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
1095 && !char_quoted (from - 1)
1096 && parse_sexp_ignore_comments)
1097 {
1098 /* we must record the comment style encountered so that
1099 later, we can match only the proper comment begin
1100 sequence of the same style */
1101 code = Sendcomment;
1102 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
1103 from--;
1104 }
1105
1106 if (SYNTAX_PREFIX (c))
1107 continue;
1108
1109 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1110 {
1111 case Sword:
1112 case Ssymbol:
1113 if (depth || !sexpflag) break;
1114 /* This word counts as a sexp; count object finished after passing it. */
1115 while (from > stop)
1116 {
1117 quoted = char_quoted (from - 1);
1118 if (quoted)
1119 from--;
1120 if (! (quoted || SYNTAX (FETCH_CHAR (from - 1)) == Sword
1121 || SYNTAX (FETCH_CHAR (from - 1)) == Ssymbol
1122 || SYNTAX (FETCH_CHAR (from - 1)) == Squote))
1123 goto done2;
1124 from--;
1125 }
1126 goto done2;
1127
1128 case Smath:
1129 if (!sexpflag)
1130 break;
1131 if (from != stop && c == FETCH_CHAR (from - 1))
1132 from--;
1133 if (mathexit)
1134 {
1135 mathexit = 0;
1136 goto open2;
1137 }
1138 mathexit = 1;
1139
1140 case Sclose:
1141 if (!++depth) goto done2;
1142 break;
1143
1144 case Sopen:
1145 open2:
1146 if (!--depth) goto done2;
1147 if (depth < min_depth)
1148 error ("Containing expression ends prematurely");
1149 break;
1150
1151 case Sendcomment:
1152 if (!parse_sexp_ignore_comments)
1153 break;
1154 #if 0
1155 if (code != SYNTAX (c))
1156 /* For a two-char comment ender, we can assume
1157 it does end a comment. So scan back in a simple way. */
1158 {
1159 if (from != stop) from--;
1160 while (1)
1161 {
1162 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1163 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1164 break;
1165 if (from == stop)
1166 {
1167 if (depth == 0)
1168 goto done2;
1169 goto lose;
1170 }
1171 from--;
1172 if (SYNTAX_COMSTART_SECOND (c)
1173 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1174 && SYNTAX_COMMENT_STYLE (c) == comstyle
1175 && !char_quoted (from))
1176 break;
1177 }
1178 break;
1179 }
1180 #endif /* 0 */
1181
1182 /* Look back, counting the parity of string-quotes,
1183 and recording the comment-starters seen.
1184 When we reach a safe place, assume that's not in a string;
1185 then step the main scan to the earliest comment-starter seen
1186 an even number of string quotes away from the safe place.
1187
1188 OFROM[I] is position of the earliest comment-starter seen
1189 which is I+2X quotes from the comment-end.
1190 PARITY is current parity of quotes from the comment end. */
1191 {
1192 int parity = 0;
1193 char my_stringend = 0;
1194 int string_lossage = 0;
1195 int comment_end = from;
1196 int comstart_pos = 0;
1197 int comstart_parity = 0;
1198 int scanstart = from - 1;
1199
1200 /* At beginning of range to scan, we're outside of strings;
1201 that determines quote parity to the comment-end. */
1202 while (from != stop)
1203 {
1204 /* Move back and examine a character. */
1205 from--;
1206
1207 c = FETCH_CHAR (from);
1208 code = SYNTAX (c);
1209
1210 /* If this char is the second of a 2-char comment sequence,
1211 back up and give the pair the appropriate syntax. */
1212 if (from > stop && SYNTAX_COMEND_SECOND (c)
1213 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
1214 {
1215 code = Sendcomment;
1216 from--;
1217 c = FETCH_CHAR (from);
1218 }
1219
1220 /* If this char starts a 2-char comment start sequence,
1221 treat it like a 1-char comment starter. */
1222 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1223 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
1224 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
1225 code = Scomment;
1226
1227 /* Ignore escaped characters. */
1228 if (char_quoted (from))
1229 continue;
1230
1231 /* Track parity of quotes. */
1232 if (code == Sstring)
1233 {
1234 parity ^= 1;
1235 if (my_stringend == 0)
1236 my_stringend = c;
1237 /* If we have two kinds of string delimiters.
1238 There's no way to grok this scanning backwards. */
1239 else if (my_stringend != c)
1240 string_lossage = 1;
1241 }
1242
1243 /* Record comment-starters according to that
1244 quote-parity to the comment-end. */
1245 if (code == Scomment)
1246 {
1247 comstart_parity = parity;
1248 comstart_pos = from;
1249 }
1250
1251 /* If we find another earlier comment-ender,
1252 any comment-starts earlier than that don't count
1253 (because they go with the earlier comment-ender). */
1254 if (code == Sendcomment
1255 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1256 break;
1257
1258 /* Assume a defun-start point is outside of strings. */
1259 if (code == Sopen
1260 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
1261 break;
1262 }
1263
1264 if (comstart_pos == 0)
1265 from = comment_end;
1266 /* If the earliest comment starter
1267 is followed by uniform paired string quotes or none,
1268 we know it can't be inside a string
1269 since if it were then the comment ender would be inside one.
1270 So it does start a comment. Skip back to it. */
1271 else if (comstart_parity == 0 && !string_lossage)
1272 from = comstart_pos;
1273 else
1274 {
1275 /* We had two kinds of string delimiters mixed up
1276 together. Decode this going forwards.
1277 Scan fwd from the previous comment ender
1278 to the one in question; this records where we
1279 last passed a comment starter. */
1280 struct lisp_parse_state state;
1281 scan_sexps_forward (&state, find_defun_start (comment_end),
1282 comment_end - 1, -10000, 0, Qnil, 0);
1283 if (state.incomment)
1284 from = state.comstart;
1285 else
1286 /* We can't grok this as a comment; scan it normally. */
1287 from = comment_end;
1288 }
1289 }
1290 break;
1291
1292 case Sstring:
1293 stringterm = FETCH_CHAR (from);
1294 while (1)
1295 {
1296 if (from == stop) goto lose;
1297 if (!char_quoted (from - 1)
1298 && stringterm == FETCH_CHAR (from - 1))
1299 break;
1300 from--;
1301 }
1302 from--;
1303 if (!depth && sexpflag) goto done2;
1304 break;
1305 }
1306 }
1307
1308 /* Reached start of buffer. Error if within object, return nil if between */
1309 if (depth) goto lose;
1310
1311 immediate_quit = 0;
1312 return Qnil;
1313
1314 done2:
1315 count++;
1316 }
1317
1318
1319 immediate_quit = 0;
1320 XSETFASTINT (val, from);
1321 return val;
1322
1323 lose:
1324 error ("Unbalanced parentheses");
1325 /* NOTREACHED */
1326 }
1327
1328 static int
1329 char_quoted (pos)
1330 register int pos;
1331 {
1332 register enum syntaxcode code;
1333 register int beg = BEGV;
1334 register int quoted = 0;
1335
1336 while (pos > beg
1337 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
1338 || code == Sescape))
1339 pos--, quoted = !quoted;
1340 return quoted;
1341 }
1342
1343 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1344 "Scan from character number FROM by COUNT lists.\n\
1345 Returns the character number of the position thus found.\n\
1346 \n\
1347 If DEPTH is nonzero, paren depth begins counting from that value,\n\
1348 only places where the depth in parentheses becomes zero\n\
1349 are candidates for stopping; COUNT such places are counted.\n\
1350 Thus, a positive value for DEPTH means go out levels.\n\
1351 \n\
1352 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1353 \n\
1354 If the beginning or end of (the accessible part of) the buffer is reached\n\
1355 and the depth is wrong, an error is signaled.\n\
1356 If the depth is right but the count is not used up, nil is returned.")
1357 (from, count, depth)
1358 Lisp_Object from, count, depth;
1359 {
1360 CHECK_NUMBER (from, 0);
1361 CHECK_NUMBER (count, 1);
1362 CHECK_NUMBER (depth, 2);
1363
1364 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1365 }
1366
1367 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1368 "Scan from character number FROM by COUNT balanced expressions.\n\
1369 If COUNT is negative, scan backwards.\n\
1370 Returns the character number of the position thus found.\n\
1371 \n\
1372 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1373 \n\
1374 If the beginning or end of (the accessible part of) the buffer is reached\n\
1375 in the middle of a parenthetical grouping, an error is signaled.\n\
1376 If the beginning or end is reached between groupings\n\
1377 but before count is used up, nil is returned.")
1378 (from, count)
1379 Lisp_Object from, count;
1380 {
1381 CHECK_NUMBER (from, 0);
1382 CHECK_NUMBER (count, 1);
1383
1384 return scan_lists (XINT (from), XINT (count), 0, 1);
1385 }
1386
1387 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1388 0, 0, 0,
1389 "Move point backward over any number of chars with prefix syntax.\n\
1390 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1391 ()
1392 {
1393 int beg = BEGV;
1394 int pos = point;
1395
1396 while (pos > beg && !char_quoted (pos - 1)
1397 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
1398 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
1399 pos--;
1400
1401 SET_PT (pos);
1402
1403 return Qnil;
1404 }
1405 \f
1406 /* Parse forward from FROM to END,
1407 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1408 and return a description of the state of the parse at END.
1409 If STOPBEFORE is nonzero, stop at the start of an atom.
1410 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1411
1412 static void
1413 scan_sexps_forward (stateptr, from, end, targetdepth,
1414 stopbefore, oldstate, commentstop)
1415 struct lisp_parse_state *stateptr;
1416 register int from;
1417 int end, targetdepth, stopbefore;
1418 Lisp_Object oldstate;
1419 int commentstop;
1420 {
1421 struct lisp_parse_state state;
1422
1423 register enum syntaxcode code;
1424 struct level { int last, prev; };
1425 struct level levelstart[100];
1426 register struct level *curlevel = levelstart;
1427 struct level *endlevel = levelstart + 100;
1428 char prev;
1429 register int depth; /* Paren depth of current scanning location.
1430 level - levelstart equals this except
1431 when the depth becomes negative. */
1432 int mindepth; /* Lowest DEPTH value seen. */
1433 int start_quoted = 0; /* Nonzero means starting after a char quote */
1434 Lisp_Object tem;
1435
1436 immediate_quit = 1;
1437 QUIT;
1438
1439 if (NILP (oldstate))
1440 {
1441 depth = 0;
1442 state.instring = -1;
1443 state.incomment = 0;
1444 state.comstyle = 0; /* comment style a by default */
1445 }
1446 else
1447 {
1448 tem = Fcar (oldstate);
1449 if (!NILP (tem))
1450 depth = XINT (tem);
1451 else
1452 depth = 0;
1453
1454 oldstate = Fcdr (oldstate);
1455 oldstate = Fcdr (oldstate);
1456 oldstate = Fcdr (oldstate);
1457 tem = Fcar (oldstate);
1458 state.instring = !NILP (tem) ? XINT (tem) : -1;
1459
1460 oldstate = Fcdr (oldstate);
1461 tem = Fcar (oldstate);
1462 state.incomment = !NILP (tem);
1463
1464 oldstate = Fcdr (oldstate);
1465 tem = Fcar (oldstate);
1466 start_quoted = !NILP (tem);
1467
1468 /* if the eight element of the list is nil, we are in comment
1469 style a. if it is non-nil, we are in comment style b */
1470 oldstate = Fcdr (oldstate);
1471 oldstate = Fcdr (oldstate);
1472 tem = Fcar (oldstate);
1473 state.comstyle = !NILP (tem);
1474 }
1475 state.quoted = 0;
1476 mindepth = depth;
1477
1478 curlevel->prev = -1;
1479 curlevel->last = -1;
1480
1481 /* Enter the loop at a place appropriate for initial state. */
1482
1483 if (state.incomment) goto startincomment;
1484 if (state.instring >= 0)
1485 {
1486 if (start_quoted) goto startquotedinstring;
1487 goto startinstring;
1488 }
1489 if (start_quoted) goto startquoted;
1490
1491 while (from < end)
1492 {
1493 code = SYNTAX (FETCH_CHAR (from));
1494 from++;
1495 if (code == Scomment)
1496 state.comstart = from-1;
1497
1498 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1499 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
1500 {
1501 /* Record the comment style we have entered so that only
1502 the comment-end sequence of the same style actually
1503 terminates the comment section. */
1504 code = Scomment;
1505 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1506 state.comstart = from-1;
1507 from++;
1508 }
1509
1510 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
1511 continue;
1512 switch (SWITCH_ENUM_CAST (code))
1513 {
1514 case Sescape:
1515 case Scharquote:
1516 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1517 curlevel->last = from - 1;
1518 startquoted:
1519 if (from == end) goto endquoted;
1520 from++;
1521 goto symstarted;
1522 /* treat following character as a word constituent */
1523 case Sword:
1524 case Ssymbol:
1525 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1526 curlevel->last = from - 1;
1527 symstarted:
1528 while (from < end)
1529 {
1530 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1531 {
1532 case Scharquote:
1533 case Sescape:
1534 from++;
1535 if (from == end) goto endquoted;
1536 break;
1537 case Sword:
1538 case Ssymbol:
1539 case Squote:
1540 break;
1541 default:
1542 goto symdone;
1543 }
1544 from++;
1545 }
1546 symdone:
1547 curlevel->prev = curlevel->last;
1548 break;
1549
1550 startincomment:
1551 if (commentstop)
1552 goto done;
1553 if (from != BEGV)
1554 {
1555 /* Enter the loop in the middle so that we find
1556 a 2-char comment ender if we start in the middle of it. */
1557 prev = FETCH_CHAR (from - 1);
1558 goto startincomment_1;
1559 }
1560 /* At beginning of buffer, enter the loop the ordinary way. */
1561
1562 case Scomment:
1563 state.incomment = 1;
1564 if (commentstop)
1565 goto done;
1566 while (1)
1567 {
1568 if (from == end) goto done;
1569 prev = FETCH_CHAR (from);
1570 if (SYNTAX (prev) == Sendcomment
1571 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1572 /* Only terminate the comment section if the endcomment
1573 of the same style as the start sequence has been
1574 encountered. */
1575 break;
1576 from++;
1577 startincomment_1:
1578 if (from < end && SYNTAX_COMEND_FIRST (prev)
1579 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1580 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1581 /* Only terminate the comment section if the end-comment
1582 sequence of the same style as the start sequence has
1583 been encountered. */
1584 { from++; break; }
1585 }
1586 state.incomment = 0;
1587 state.comstyle = 0; /* reset the comment style */
1588 break;
1589
1590 case Sopen:
1591 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1592 depth++;
1593 /* curlevel++->last ran into compiler bug on Apollo */
1594 curlevel->last = from - 1;
1595 if (++curlevel == endlevel)
1596 error ("Nesting too deep for parser");
1597 curlevel->prev = -1;
1598 curlevel->last = -1;
1599 if (targetdepth == depth) goto done;
1600 break;
1601
1602 case Sclose:
1603 depth--;
1604 if (depth < mindepth)
1605 mindepth = depth;
1606 if (curlevel != levelstart)
1607 curlevel--;
1608 curlevel->prev = curlevel->last;
1609 if (targetdepth == depth) goto done;
1610 break;
1611
1612 case Sstring:
1613 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1614 curlevel->last = from - 1;
1615 state.instring = FETCH_CHAR (from - 1);
1616 startinstring:
1617 while (1)
1618 {
1619 if (from >= end) goto done;
1620 if (FETCH_CHAR (from) == state.instring) break;
1621 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1622 {
1623 case Scharquote:
1624 case Sescape:
1625 from++;
1626 startquotedinstring:
1627 if (from >= end) goto endquoted;
1628 }
1629 from++;
1630 }
1631 state.instring = -1;
1632 curlevel->prev = curlevel->last;
1633 from++;
1634 break;
1635
1636 case Smath:
1637 break;
1638 }
1639 }
1640 goto done;
1641
1642 stop: /* Here if stopping before start of sexp. */
1643 from--; /* We have just fetched the char that starts it; */
1644 goto done; /* but return the position before it. */
1645
1646 endquoted:
1647 state.quoted = 1;
1648 done:
1649 state.depth = depth;
1650 state.mindepth = mindepth;
1651 state.thislevelstart = curlevel->prev;
1652 state.prevlevelstart
1653 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1654 state.location = from;
1655 immediate_quit = 0;
1656
1657 *stateptr = state;
1658 }
1659
1660 /* This comment supplies the doc string for parse-partial-sexp,
1661 for make-docfile to see. We cannot put this in the real DEFUN
1662 due to limits in the Unix cpp.
1663
1664 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
1665 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1666 Parsing stops at TO or when certain criteria are met;\n\
1667 point is set to where parsing stops.\n\
1668 If fifth arg STATE is omitted or nil,\n\
1669 parsing assumes that FROM is the beginning of a function.\n\
1670 Value is a list of eight elements describing final state of parsing:\n\
1671 0. depth in parens.\n\
1672 1. character address of start of innermost containing list; nil if none.\n\
1673 2. character address of start of last complete sexp terminated.\n\
1674 3. non-nil if inside a string.\n\
1675 (it is the character that will terminate the string.)\n\
1676 4. t if inside a comment.\n\
1677 5. t if following a quote character.\n\
1678 6. the minimum paren-depth encountered during this scan.\n\
1679 7. t if in a comment of style `b'.\n\
1680 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1681 in parentheses becomes equal to TARGETDEPTH.\n\
1682 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1683 any character that starts a sexp.\n\
1684 Fifth arg STATE is an eight-list like what this function returns.\n\
1685 It is used to initialize the state of the parse. Its second and third
1686 elements are ignored.
1687 Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1688 (from, to, targetdepth, stopbefore, state, commentstop)
1689 */
1690
1691 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
1692 0 /* See immediately above */)
1693 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1694 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
1695 {
1696 struct lisp_parse_state state;
1697 int target;
1698
1699 if (!NILP (targetdepth))
1700 {
1701 CHECK_NUMBER (targetdepth, 3);
1702 target = XINT (targetdepth);
1703 }
1704 else
1705 target = -100000; /* We won't reach this depth */
1706
1707 validate_region (&from, &to);
1708 scan_sexps_forward (&state, XINT (from), XINT (to),
1709 target, !NILP (stopbefore), oldstate,
1710 !NILP (commentstop));
1711
1712 SET_PT (state.location);
1713
1714 return Fcons (make_number (state.depth),
1715 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1716 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1717 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1718 Fcons (state.incomment ? Qt : Qnil,
1719 Fcons (state.quoted ? Qt : Qnil,
1720 Fcons (make_number (state.mindepth),
1721 Fcons (state.comstyle ? Qt : Qnil,
1722 Qnil))))))));
1723 }
1724 \f
1725 init_syntax_once ()
1726 {
1727 register int i;
1728 Lisp_Object temp;
1729
1730 /* This has to be done here, before we call Fmake_char_table. */
1731 Qsyntax_table = intern ("syntax-table");
1732 staticpro (&Qsyntax_table);
1733
1734 /* Intern this now in case it isn't already done.
1735 Setting this variable twice is harmless.
1736 But don't staticpro it here--that is done in alloc.c. */
1737 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1738
1739 /* Now we are ready to set up this property, so we can
1740 create syntax tables. */
1741 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
1742
1743 temp = Fcons (make_number ((int) Swhitespace), Qnil);
1744
1745 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
1746
1747 temp = Fcons (make_number ((int) Sword), Qnil);
1748 for (i = 'a'; i <= 'z'; i++)
1749 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1750 for (i = 'A'; i <= 'Z'; i++)
1751 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1752 for (i = '0'; i <= '9'; i++)
1753 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1754
1755 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
1756 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
1757
1758 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
1759 Fcons (make_number (Sopen), make_number (')')));
1760 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
1761 Fcons (make_number (Sclose), make_number ('(')));
1762 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
1763 Fcons (make_number (Sopen), make_number (']')));
1764 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
1765 Fcons (make_number (Sclose), make_number ('[')));
1766 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
1767 Fcons (make_number (Sopen), make_number ('}')));
1768 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
1769 Fcons (make_number (Sclose), make_number ('{')));
1770 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
1771 Fcons (make_number ((int) Sstring), Qnil));
1772 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
1773 Fcons (make_number ((int) Sescape), Qnil));
1774
1775 temp = Fcons (make_number ((int) Ssymbol), Qnil);
1776 for (i = 0; i < 10; i++)
1777 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, "_-+*/&|<>="[i], temp);
1778
1779 temp = Fcons (make_number ((int) Spunct), Qnil);
1780 for (i = 0; i < 12; i++)
1781 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ".,;:?!#@~^'`"[i], temp);
1782 }
1783
1784 syms_of_syntax ()
1785 {
1786 Qsyntax_table_p = intern ("syntax-table-p");
1787 staticpro (&Qsyntax_table_p);
1788
1789 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1790 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1791
1792 words_include_escapes = 0;
1793 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1794 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1795
1796 defsubr (&Ssyntax_table_p);
1797 defsubr (&Ssyntax_table);
1798 defsubr (&Sstandard_syntax_table);
1799 defsubr (&Scopy_syntax_table);
1800 defsubr (&Sset_syntax_table);
1801 defsubr (&Schar_syntax);
1802 defsubr (&Smatching_paren);
1803 defsubr (&Smodify_syntax_entry);
1804 defsubr (&Sdescribe_syntax);
1805
1806 defsubr (&Sforward_word);
1807
1808 defsubr (&Sforward_comment);
1809 defsubr (&Sscan_lists);
1810 defsubr (&Sscan_sexps);
1811 defsubr (&Sbackward_prefix_chars);
1812 defsubr (&Sparse_partial_sexp);
1813 }