(Qsyntax_table): New variable.
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 #include <config.h>
22 #include <ctype.h>
23 #include "lisp.h"
24 #include "commands.h"
25 #include "buffer.h"
26 #include "syntax.h"
27
28 Lisp_Object Qsyntax_table_p, Qsyntax_table;
29
30 static void scan_sexps_forward ();
31 static int char_quoted ();
32
33 int words_include_escapes;
34
35 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
36 if not compiled with GCC. No need to mark it, since it is used
37 only very temporarily. */
38 Lisp_Object syntax_temp;
39
40 /* This is the internal form of the parse state used in parse-partial-sexp. */
41
42 struct lisp_parse_state
43 {
44 int depth; /* Depth at end of parsing */
45 int instring; /* -1 if not within string, else desired terminator. */
46 int incomment; /* Nonzero if within a comment at end of parsing */
47 int comstyle; /* comment style a=0, or b=1 */
48 int quoted; /* Nonzero if just after an escape char at end of parsing */
49 int thislevelstart; /* Char number of most recent start-of-expression at current level */
50 int prevlevelstart; /* Char number of start of containing expression */
51 int location; /* Char number at which parsing stopped. */
52 int mindepth; /* Minimum depth seen while scanning. */
53 int comstart; /* Position just after last comment starter. */
54 };
55 \f
56 /* These variables are a cache for finding the start of a defun.
57 find_start_pos is the place for which the defun start was found.
58 find_start_value is the defun start position found for it.
59 find_start_buffer is the buffer it was found in.
60 find_start_begv is the BEGV value when it was found.
61 find_start_modiff is the value of MODIFF when it was found. */
62
63 static int find_start_pos;
64 static int find_start_value;
65 static struct buffer *find_start_buffer;
66 static int find_start_begv;
67 static int find_start_modiff;
68
69 /* Find a defun-start that is the last one before POS (or nearly the last).
70 We record what we find, so that another call in the same area
71 can return the same value right away. */
72
73 static int
74 find_defun_start (pos)
75 int pos;
76 {
77 int tem;
78 int shortage;
79
80 /* Use previous finding, if it's valid and applies to this inquiry. */
81 if (current_buffer == find_start_buffer
82 /* Reuse the defun-start even if POS is a little farther on.
83 POS might be in the next defun, but that's ok.
84 Our value may not be the best possible, but will still be usable. */
85 && pos <= find_start_pos + 1000
86 && pos >= find_start_value
87 && BEGV == find_start_begv
88 && MODIFF == find_start_modiff)
89 return find_start_value;
90
91 /* Back up to start of line. */
92 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
93
94 while (tem > BEGV)
95 {
96 /* Open-paren at start of line means we found our defun-start. */
97 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
98 break;
99 /* Move to beg of previous line. */
100 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
101 }
102
103 /* Record what we found, for the next try. */
104 find_start_value = tem;
105 find_start_buffer = current_buffer;
106 find_start_modiff = MODIFF;
107 find_start_begv = BEGV;
108 find_start_pos = pos;
109
110 return find_start_value;
111 }
112 \f
113 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
114 "Return t if ARG is a syntax table.\n\
115 Currently, any char-table counts as a syntax table.")
116 (obj)
117 Lisp_Object obj;
118 {
119 if (CHAR_TABLE_P (obj)
120 && XCHAR_TABLE (obj)->purpose == Qsyntax_table)
121 return Qt;
122 return Qnil;
123 }
124
125 static void
126 check_syntax_table (obj)
127 Lisp_Object obj;
128 {
129 CHECK_CHAR_TABLE (obj, 0);
130 }
131
132
133 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
134 "Return the current syntax table.\n\
135 This is the one specified by the current buffer.")
136 ()
137 {
138 return current_buffer->syntax_table;
139 }
140
141 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
142 Sstandard_syntax_table, 0, 0, 0,
143 "Return the standard syntax table.\n\
144 This is the one used for new buffers.")
145 ()
146 {
147 return Vstandard_syntax_table;
148 }
149
150 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
151 "Construct a new syntax table and return it.\n\
152 It is a copy of the TABLE, which defaults to the standard syntax table.")
153 (table)
154 Lisp_Object table;
155 {
156 Lisp_Object copy;
157
158 if (!NILP (table))
159 check_syntax_table (table);
160 else
161 table = Vstandard_syntax_table;
162
163 copy = Fcopy_sequence (table);
164 Fset_char_table_parent (copy, Vstandard_syntax_table);
165 return copy;
166 }
167
168 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
169 "Select a new syntax table for the current buffer.\n\
170 One argument, a syntax table.")
171 (table)
172 Lisp_Object table;
173 {
174 check_syntax_table (table);
175 current_buffer->syntax_table = table;
176 /* Indicate that this buffer now has a specified syntax table. */
177 current_buffer->local_var_flags
178 |= XFASTINT (buffer_local_flags.syntax_table);
179 return table;
180 }
181 \f
182 /* Convert a letter which signifies a syntax code
183 into the code it signifies.
184 This is used by modify-syntax-entry, and other things. */
185
186 unsigned char syntax_spec_code[0400] =
187 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
188 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
189 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 (char) Swhitespace, 0377, (char) Sstring, 0377,
192 (char) Smath, 0377, 0377, (char) Squote,
193 (char) Sopen, (char) Sclose, 0377, 0377,
194 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
195 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
196 0377, 0377, 0377, 0377,
197 (char) Scomment, 0377, (char) Sendcomment, 0377,
198 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
199 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
200 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
201 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
203 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
206 };
207
208 /* Indexed by syntax code, give the letter that describes it. */
209
210 char syntax_code_spec[14] =
211 {
212 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
213 };
214 \f
215 /* Look up the value for CHARACTER in syntax table TABLE's parent
216 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
217 for CHARACTER. It's actually used only when not compiled with GCC. */
218
219 Lisp_Object
220 syntax_parent_lookup (table, character)
221 Lisp_Object table;
222 int character;
223 {
224 Lisp_Object value;
225
226 while (1)
227 {
228 table = XCHAR_TABLE (table)->parent;
229 if (NILP (table))
230 return Qnil;
231
232 value = XCHAR_TABLE (table)->contents[character];
233 if (!NILP (value))
234 return value;
235 }
236 }
237
238 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
239 "Return the syntax code of CHAR, described by a character.\n\
240 For example, if CHAR is a word constituent, the character `?w' is returned.\n\
241 The characters that correspond to various syntax codes\n\
242 are listed in the documentation of `modify-syntax-entry'.")
243 (ch)
244 Lisp_Object ch;
245 {
246 int char_int;
247 CHECK_NUMBER (ch, 0);
248 char_int = XINT (ch);
249 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
250 }
251
252 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
253 "Return the matching parenthesis of CHAR, or nil if none.")
254 (ch)
255 Lisp_Object ch;
256 {
257 int char_int, code;
258 CHECK_NUMBER (ch, 0);
259 char_int = XINT (ch);
260 code = SYNTAX (char_int);
261 if (code == Sopen || code == Sclose)
262 return make_number (SYNTAX_MATCH (char_int));
263 return Qnil;
264 }
265
266 /* This comment supplies the doc string for modify-syntax-entry,
267 for make-docfile to see. We cannot put this in the real DEFUN
268 due to limits in the Unix cpp.
269
270 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
271 "Set syntax for character CHAR according to string S.\n\
272 The syntax is changed only for table TABLE, which defaults to\n\
273 the current buffer's syntax table.\n\
274 The first character of S should be one of the following:\n\
275 Space or - whitespace syntax. w word constituent.\n\
276 _ symbol constituent. . punctuation.\n\
277 ( open-parenthesis. ) close-parenthesis.\n\
278 \" string quote. \\ escape.\n\
279 $ paired delimiter. ' expression quote or prefix operator.\n\
280 < comment starter. > comment ender.\n\
281 / character-quote. @ inherit from `standard-syntax-table'.\n\
282 \n\
283 Only single-character comment start and end sequences are represented thus.\n\
284 Two-character sequences are represented as described below.\n\
285 The second character of S is the matching parenthesis,\n\
286 used only if the first character is `(' or `)'.\n\
287 Any additional characters are flags.\n\
288 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
289 1 means C is the start of a two-char comment start sequence.\n\
290 2 means C is the second character of such a sequence.\n\
291 3 means C is the start of a two-char comment end sequence.\n\
292 4 means C is the second character of such a sequence.\n\
293 \n\
294 There can be up to two orthogonal comment sequences. This is to support\n\
295 language modes such as C++. By default, all comment sequences are of style\n\
296 a, but you can set the comment sequence style to b (on the second character\n\
297 of a comment-start, or the first character of a comment-end sequence) using\n\
298 this flag:\n\
299 b means C is part of comment sequence b.\n\
300 \n\
301 p means C is a prefix character for `backward-prefix-chars';\n\
302 such characters are treated as whitespace when they occur\n\
303 between expressions.")
304 (char, s, table)
305 */
306
307 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
308 /* I really don't know why this is interactive
309 help-form should at least be made useful whilst reading the second arg
310 */
311 "cSet syntax for character: \nsSet syntax for %s to: ",
312 0 /* See immediately above */)
313 (c, newentry, syntax_table)
314 Lisp_Object c, newentry, syntax_table;
315 {
316 register unsigned char *p;
317 register enum syntaxcode code;
318 int val;
319 Lisp_Object match;
320
321 CHECK_NUMBER (c, 0);
322 CHECK_STRING (newentry, 1);
323
324 if (NILP (syntax_table))
325 syntax_table = current_buffer->syntax_table;
326 else
327 check_syntax_table (syntax_table);
328
329 p = XSTRING (newentry)->data;
330 code = (enum syntaxcode) syntax_spec_code[*p++];
331 if (((int) code & 0377) == 0377)
332 error ("invalid syntax description letter: %c", c);
333
334 if (code == Sinherit)
335 {
336 SET_RAW_SYNTAX_ENTRY (syntax_table, c, Qnil);
337 return Qnil;
338 }
339
340 if (*p)
341 XSETINT (match, *p++);
342 if (XFASTINT (match) == ' ')
343 match = Qnil;
344
345 val = (int) code;
346 while (*p)
347 switch (*p++)
348 {
349 case '1':
350 val |= 1 << 16;
351 break;
352
353 case '2':
354 val |= 1 << 17;
355 break;
356
357 case '3':
358 val |= 1 << 18;
359 break;
360
361 case '4':
362 val |= 1 << 19;
363 break;
364
365 case 'p':
366 val |= 1 << 20;
367 break;
368
369 case 'b':
370 val |= 1 << 21;
371 break;
372 }
373
374 SET_RAW_SYNTAX_ENTRY (syntax_table, c,
375 Fcons (make_number (val), match));
376
377 return Qnil;
378 }
379 \f
380 /* Dump syntax table to buffer in human-readable format */
381
382 static void
383 describe_syntax (value)
384 Lisp_Object value;
385 {
386 register enum syntaxcode code;
387 char desc, match, start1, start2, end1, end2, prefix, comstyle;
388 char str[2];
389 Lisp_Object first, match_lisp;
390
391 Findent_to (make_number (16), make_number (1));
392
393 if (NILP (value))
394 {
395 insert_string ("inherit");
396 return;
397 }
398
399 if (!CONSP (value))
400 {
401 insert_string ("invalid");
402 return;
403 }
404
405 first = XCONS (value)->car;
406 match_lisp = XCONS (value)->cdr;
407
408 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
409 {
410 insert_string ("invalid");
411 return;
412 }
413
414 code = (enum syntaxcode) (first & 0377);
415 start1 = (XINT (first) >> 16) & 1;
416 start2 = (XINT (first) >> 17) & 1;
417 end1 = (XINT (first) >> 18) & 1;
418 end2 = (XINT (first) >> 19) & 1;
419 prefix = (XINT (first) >> 20) & 1;
420 comstyle = (XINT (first) >> 21) & 1;
421
422 if ((int) code < 0 || (int) code >= (int) Smax)
423 {
424 insert_string ("invalid");
425 return;
426 }
427 desc = syntax_code_spec[(int) code];
428
429 str[0] = desc, str[1] = 0;
430 insert (str, 1);
431
432 str[0] = !NILP (match_lisp) ? XINT (match_lisp) : ' ';
433 insert (str, 1);
434
435 if (start1)
436 insert ("1", 1);
437 if (start2)
438 insert ("2", 1);
439
440 if (end1)
441 insert ("3", 1);
442 if (end2)
443 insert ("4", 1);
444
445 if (prefix)
446 insert ("p", 1);
447 if (comstyle)
448 insert ("b", 1);
449
450 insert_string ("\twhich means: ");
451
452 switch (SWITCH_ENUM_CAST (code))
453 {
454 case Swhitespace:
455 insert_string ("whitespace"); break;
456 case Spunct:
457 insert_string ("punctuation"); break;
458 case Sword:
459 insert_string ("word"); break;
460 case Ssymbol:
461 insert_string ("symbol"); break;
462 case Sopen:
463 insert_string ("open"); break;
464 case Sclose:
465 insert_string ("close"); break;
466 case Squote:
467 insert_string ("quote"); break;
468 case Sstring:
469 insert_string ("string"); break;
470 case Smath:
471 insert_string ("math"); break;
472 case Sescape:
473 insert_string ("escape"); break;
474 case Scharquote:
475 insert_string ("charquote"); break;
476 case Scomment:
477 insert_string ("comment"); break;
478 case Sendcomment:
479 insert_string ("endcomment"); break;
480 default:
481 insert_string ("invalid");
482 return;
483 }
484
485 if (!NILP (match_lisp))
486 {
487 insert_string (", matches ");
488 insert_char (XINT (match_lisp));
489 }
490
491 if (start1)
492 insert_string (",\n\t is the first character of a comment-start sequence");
493 if (start2)
494 insert_string (",\n\t is the second character of a comment-start sequence");
495
496 if (end1)
497 insert_string (",\n\t is the first character of a comment-end sequence");
498 if (end2)
499 insert_string (",\n\t is the second character of a comment-end sequence");
500 if (comstyle)
501 insert_string (" (comment style b)");
502
503 if (prefix)
504 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
505
506 insert_string ("\n");
507 }
508
509 static Lisp_Object
510 describe_syntax_1 (vector)
511 Lisp_Object vector;
512 {
513 struct buffer *old = current_buffer;
514 set_buffer_internal (XBUFFER (Vstandard_output));
515 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
516 call0 (intern ("help-mode"));
517 set_buffer_internal (old);
518 return Qnil;
519 }
520
521 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
522 "Describe the syntax specifications in the syntax table.\n\
523 The descriptions are inserted in a buffer, which is then displayed.")
524 ()
525 {
526 internal_with_output_to_temp_buffer
527 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
528
529 return Qnil;
530 }
531 \f
532 /* Return the position across COUNT words from FROM.
533 If that many words cannot be found before the end of the buffer, return 0.
534 COUNT negative means scan backward and stop at word beginning. */
535
536 scan_words (from, count)
537 register int from, count;
538 {
539 register int beg = BEGV;
540 register int end = ZV;
541 register int code;
542 int charcode;
543
544 immediate_quit = 1;
545 QUIT;
546
547 while (count > 0)
548 {
549 while (1)
550 {
551 if (from == end)
552 {
553 immediate_quit = 0;
554 return 0;
555 }
556 charcode = FETCH_CHAR (from);
557 code = SYNTAX (charcode);
558 if (words_include_escapes
559 && (code == Sescape || code == Scharquote))
560 break;
561 if (code == Sword)
562 break;
563 from++;
564 }
565 while (1)
566 {
567 if (from == end) break;
568 charcode = FETCH_CHAR (from);
569 code = SYNTAX (charcode);
570 if (!(words_include_escapes
571 && (code == Sescape || code == Scharquote)))
572 if (code != Sword)
573 break;
574 from++;
575 }
576 count--;
577 }
578 while (count < 0)
579 {
580 while (1)
581 {
582 if (from == beg)
583 {
584 immediate_quit = 0;
585 return 0;
586 }
587 charcode = FETCH_CHAR (from - 1);
588 code = SYNTAX (charcode);
589 if (words_include_escapes
590 && (code == Sescape || code == Scharquote))
591 break;
592 if (code == Sword)
593 break;
594 from--;
595 }
596 while (1)
597 {
598 if (from == beg) break;
599 charcode = FETCH_CHAR (from - 1);
600 code = SYNTAX (charcode);
601 if (!(words_include_escapes
602 && (code == Sescape || code == Scharquote)))
603 if (code != Sword)
604 break;
605 from--;
606 }
607 count++;
608 }
609
610 immediate_quit = 0;
611
612 return from;
613 }
614
615 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
616 "Move point forward ARG words (backward if ARG is negative).\n\
617 Normally returns t.\n\
618 If an edge of the buffer is reached, point is left there\n\
619 and nil is returned.")
620 (count)
621 Lisp_Object count;
622 {
623 int val;
624 CHECK_NUMBER (count, 0);
625
626 if (!(val = scan_words (point, XINT (count))))
627 {
628 SET_PT (XINT (count) > 0 ? ZV : BEGV);
629 return Qnil;
630 }
631 SET_PT (val);
632 return Qt;
633 }
634 \f
635 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
636 "Move forward across up to N comments. If N is negative, move backward.\n\
637 Stop scanning if we find something other than a comment or whitespace.\n\
638 Set point to where scanning stops.\n\
639 If N comments are found as expected, with nothing except whitespace\n\
640 between them, return t; otherwise return nil.")
641 (count)
642 Lisp_Object count;
643 {
644 register int from;
645 register int stop;
646 register int c, c1;
647 register enum syntaxcode code;
648 int comstyle = 0; /* style of comment encountered */
649 int found;
650 int count1;
651
652 CHECK_NUMBER (count, 0);
653 count1 = XINT (count);
654
655 immediate_quit = 1;
656 QUIT;
657
658 from = PT;
659
660 while (count1 > 0)
661 {
662 stop = ZV;
663 do
664 {
665 if (from == stop)
666 {
667 SET_PT (from);
668 immediate_quit = 0;
669 return Qnil;
670 }
671 c = FETCH_CHAR (from);
672 code = SYNTAX (c);
673 from++;
674 comstyle = 0;
675 if (from < stop && SYNTAX_COMSTART_FIRST (c)
676 && (c1 = FETCH_CHAR (from),
677 SYNTAX_COMSTART_SECOND (c1)))
678 {
679 /* We have encountered a comment start sequence and we
680 are ignoring all text inside comments. We must record
681 the comment style this sequence begins so that later,
682 only a comment end of the same style actually ends
683 the comment section. */
684 code = Scomment;
685 comstyle = SYNTAX_COMMENT_STYLE (c1);
686 from++;
687 }
688 }
689 while (code == Swhitespace || code == Sendcomment);
690 if (code != Scomment)
691 {
692 immediate_quit = 0;
693 SET_PT (from - 1);
694 return Qnil;
695 }
696 /* We're at the start of a comment. */
697 while (1)
698 {
699 if (from == stop)
700 {
701 immediate_quit = 0;
702 SET_PT (from);
703 return Qnil;
704 }
705 c = FETCH_CHAR (from);
706 from++;
707 if (SYNTAX (c) == Sendcomment
708 && SYNTAX_COMMENT_STYLE (c) == comstyle)
709 /* we have encountered a comment end of the same style
710 as the comment sequence which began this comment
711 section */
712 break;
713 if (from < stop && SYNTAX_COMEND_FIRST (c)
714 && (c1 = FETCH_CHAR (from),
715 SYNTAX_COMEND_SECOND (c1))
716 && SYNTAX_COMMENT_STYLE (c) == comstyle)
717 /* we have encountered a comment end of the same style
718 as the comment sequence which began this comment
719 section */
720 { from++; break; }
721 }
722 /* We have skipped one comment. */
723 count1--;
724 }
725
726 while (count1 < 0)
727 {
728 stop = BEGV;
729 while (from > stop)
730 {
731 int quoted;
732
733 from--;
734 quoted = char_quoted (from);
735 if (quoted)
736 from--;
737 c = FETCH_CHAR (from);
738 code = SYNTAX (c);
739 comstyle = 0;
740 if (code == Sendcomment)
741 comstyle = SYNTAX_COMMENT_STYLE (c);
742 if (from > stop && SYNTAX_COMEND_SECOND (c)
743 && (c1 = FETCH_CHAR (from - 1),
744 SYNTAX_COMEND_FIRST (c1))
745 && !char_quoted (from - 1))
746 {
747 /* We must record the comment style encountered so that
748 later, we can match only the proper comment begin
749 sequence of the same style. */
750 code = Sendcomment;
751 comstyle = SYNTAX_COMMENT_STYLE (c1);
752 from--;
753 }
754
755 if (code == Sendcomment && !quoted)
756 {
757 #if 0
758 if (code != SYNTAX (c))
759 /* For a two-char comment ender, we can assume
760 it does end a comment. So scan back in a simple way. */
761 {
762 if (from != stop) from--;
763 while (1)
764 {
765 if ((c = FETCH_CHAR (from),
766 SYNTAX (c) == Scomment)
767 && SYNTAX_COMMENT_STYLE (c) == comstyle)
768 break;
769 if (from == stop)
770 {
771 immediate_quit = 0;
772 SET_PT (from);
773 return Qnil;
774 }
775 from--;
776 if (SYNTAX_COMSTART_SECOND (c)
777 && (c1 = FETCH_CHAR (from),
778 SYNTAX_COMSTART_FIRST (c1))
779 && SYNTAX_COMMENT_STYLE (c) == comstyle
780 && !char_quoted (from))
781 break;
782 }
783 break;
784 }
785 #endif /* 0 */
786
787 /* Look back, counting the parity of string-quotes,
788 and recording the comment-starters seen.
789 When we reach a safe place, assume that's not in a string;
790 then step the main scan to the earliest comment-starter seen
791 an even number of string quotes away from the safe place.
792
793 OFROM[I] is position of the earliest comment-starter seen
794 which is I+2X quotes from the comment-end.
795 PARITY is current parity of quotes from the comment end. */
796 {
797 int parity = 0;
798 char my_stringend = 0;
799 int string_lossage = 0;
800 int comment_end = from;
801 int comstart_pos = 0;
802 int comstart_parity = 0;
803 int scanstart = from - 1;
804
805 /* At beginning of range to scan, we're outside of strings;
806 that determines quote parity to the comment-end. */
807 while (from != stop)
808 {
809 /* Move back and examine a character. */
810 from--;
811
812 c = FETCH_CHAR (from);
813 code = SYNTAX (c);
814
815 /* If this char is the second of a 2-char comment sequence,
816 back up and give the pair the appropriate syntax. */
817 if (from > stop && SYNTAX_COMEND_SECOND (c)
818 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
819 {
820 code = Sendcomment;
821 from--;
822 c = FETCH_CHAR (from);
823 }
824
825 /* If this char starts a 2-char comment start sequence,
826 treat it like a 1-char comment starter. */
827 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
828 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
829 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
830 code = Scomment;
831
832 /* Ignore escaped characters. */
833 if (char_quoted (from))
834 continue;
835
836 /* Track parity of quotes. */
837 if (code == Sstring)
838 {
839 parity ^= 1;
840 if (my_stringend == 0)
841 my_stringend = c;
842 /* If we have two kinds of string delimiters.
843 There's no way to grok this scanning backwards. */
844 else if (my_stringend != c)
845 string_lossage = 1;
846 }
847
848 /* Record comment-starters according to that
849 quote-parity to the comment-end. */
850 if (code == Scomment)
851 {
852 comstart_parity = parity;
853 comstart_pos = from;
854 }
855
856 /* If we find another earlier comment-ender,
857 any comment-starts earlier than that don't count
858 (because they go with the earlier comment-ender). */
859 if (code == Sendcomment
860 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
861 break;
862
863 /* Assume a defun-start point is outside of strings. */
864 if (code == Sopen
865 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
866 break;
867 }
868
869 if (comstart_pos == 0)
870 from = comment_end;
871 /* If the earliest comment starter
872 is followed by uniform paired string quotes or none,
873 we know it can't be inside a string
874 since if it were then the comment ender would be inside one.
875 So it does start a comment. Skip back to it. */
876 else if (comstart_parity == 0 && !string_lossage)
877 from = comstart_pos;
878 else
879 {
880 /* We had two kinds of string delimiters mixed up
881 together. Decode this going forwards.
882 Scan fwd from the previous comment ender
883 to the one in question; this records where we
884 last passed a comment starter. */
885 struct lisp_parse_state state;
886 scan_sexps_forward (&state, find_defun_start (comment_end),
887 comment_end - 1, -10000, 0, Qnil, 0);
888 if (state.incomment)
889 from = state.comstart;
890 else
891 /* We can't grok this as a comment; scan it normally. */
892 from = comment_end;
893 }
894 }
895 /* We have skipped one comment. */
896 break;
897 }
898 else if ((code != Swhitespace && code != Scomment) || quoted)
899 {
900 immediate_quit = 0;
901 SET_PT (from + 1);
902 return Qnil;
903 }
904 }
905
906 count1++;
907 }
908
909 SET_PT (from);
910 immediate_quit = 0;
911 return Qt;
912 }
913 \f
914 int parse_sexp_ignore_comments;
915
916 Lisp_Object
917 scan_lists (from, count, depth, sexpflag)
918 register int from;
919 int count, depth, sexpflag;
920 {
921 Lisp_Object val;
922 register int stop;
923 register int c;
924 unsigned char stringterm;
925 int quoted;
926 int mathexit = 0;
927 register enum syntaxcode code;
928 int min_depth = depth; /* Err out if depth gets less than this. */
929 int comstyle = 0; /* style of comment encountered */
930
931 if (depth > 0) min_depth = 0;
932
933 immediate_quit = 1;
934 QUIT;
935
936 while (count > 0)
937 {
938 stop = ZV;
939 while (from < stop)
940 {
941 c = FETCH_CHAR (from);
942 code = SYNTAX (c);
943 from++;
944 if (from < stop && SYNTAX_COMSTART_FIRST (c)
945 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
946 && parse_sexp_ignore_comments)
947 {
948 /* we have encountered a comment start sequence and we
949 are ignoring all text inside comments. we must record
950 the comment style this sequence begins so that later,
951 only a comment end of the same style actually ends
952 the comment section */
953 code = Scomment;
954 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
955 from++;
956 }
957
958 if (SYNTAX_PREFIX (c))
959 continue;
960
961 switch (SWITCH_ENUM_CAST (code))
962 {
963 case Sescape:
964 case Scharquote:
965 if (from == stop) goto lose;
966 from++;
967 /* treat following character as a word constituent */
968 case Sword:
969 case Ssymbol:
970 if (depth || !sexpflag) break;
971 /* This word counts as a sexp; return at end of it. */
972 while (from < stop)
973 {
974 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
975 {
976 case Scharquote:
977 case Sescape:
978 from++;
979 if (from == stop) goto lose;
980 break;
981 case Sword:
982 case Ssymbol:
983 case Squote:
984 break;
985 default:
986 goto done;
987 }
988 from++;
989 }
990 goto done;
991
992 case Scomment:
993 if (!parse_sexp_ignore_comments) break;
994 while (1)
995 {
996 if (from == stop)
997 {
998 if (depth == 0)
999 goto done;
1000 goto lose;
1001 }
1002 c = FETCH_CHAR (from);
1003 if (SYNTAX (c) == Sendcomment
1004 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1005 /* we have encountered a comment end of the same style
1006 as the comment sequence which began this comment
1007 section */
1008 break;
1009 from++;
1010 if (from < stop && SYNTAX_COMEND_FIRST (c)
1011 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1012 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1013 /* we have encountered a comment end of the same style
1014 as the comment sequence which began this comment
1015 section */
1016 { from++; break; }
1017 }
1018 break;
1019
1020 case Smath:
1021 if (!sexpflag)
1022 break;
1023 if (from != stop && c == FETCH_CHAR (from))
1024 from++;
1025 if (mathexit)
1026 {
1027 mathexit = 0;
1028 goto close1;
1029 }
1030 mathexit = 1;
1031
1032 case Sopen:
1033 if (!++depth) goto done;
1034 break;
1035
1036 case Sclose:
1037 close1:
1038 if (!--depth) goto done;
1039 if (depth < min_depth)
1040 error ("Containing expression ends prematurely");
1041 break;
1042
1043 case Sstring:
1044 stringterm = FETCH_CHAR (from - 1);
1045 while (1)
1046 {
1047 if (from >= stop) goto lose;
1048 if (FETCH_CHAR (from) == stringterm) break;
1049 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1050 {
1051 case Scharquote:
1052 case Sescape:
1053 from++;
1054 }
1055 from++;
1056 }
1057 from++;
1058 if (!depth && sexpflag) goto done;
1059 break;
1060 }
1061 }
1062
1063 /* Reached end of buffer. Error if within object, return nil if between */
1064 if (depth) goto lose;
1065
1066 immediate_quit = 0;
1067 return Qnil;
1068
1069 /* End of object reached */
1070 done:
1071 count--;
1072 }
1073
1074 while (count < 0)
1075 {
1076 stop = BEGV;
1077 while (from > stop)
1078 {
1079 from--;
1080 if (quoted = char_quoted (from))
1081 from--;
1082 c = FETCH_CHAR (from);
1083 code = SYNTAX (c);
1084 comstyle = 0;
1085 if (code == Sendcomment)
1086 comstyle = SYNTAX_COMMENT_STYLE (c);
1087 if (from > stop && SYNTAX_COMEND_SECOND (c)
1088 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
1089 && !char_quoted (from - 1)
1090 && parse_sexp_ignore_comments)
1091 {
1092 /* we must record the comment style encountered so that
1093 later, we can match only the proper comment begin
1094 sequence of the same style */
1095 code = Sendcomment;
1096 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
1097 from--;
1098 }
1099
1100 if (SYNTAX_PREFIX (c))
1101 continue;
1102
1103 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1104 {
1105 case Sword:
1106 case Ssymbol:
1107 if (depth || !sexpflag) break;
1108 /* This word counts as a sexp; count object finished after passing it. */
1109 while (from > stop)
1110 {
1111 quoted = char_quoted (from - 1);
1112 if (quoted)
1113 from--;
1114 if (! (quoted || SYNTAX (FETCH_CHAR (from - 1)) == Sword
1115 || SYNTAX (FETCH_CHAR (from - 1)) == Ssymbol
1116 || SYNTAX (FETCH_CHAR (from - 1)) == Squote))
1117 goto done2;
1118 from--;
1119 }
1120 goto done2;
1121
1122 case Smath:
1123 if (!sexpflag)
1124 break;
1125 if (from != stop && c == FETCH_CHAR (from - 1))
1126 from--;
1127 if (mathexit)
1128 {
1129 mathexit = 0;
1130 goto open2;
1131 }
1132 mathexit = 1;
1133
1134 case Sclose:
1135 if (!++depth) goto done2;
1136 break;
1137
1138 case Sopen:
1139 open2:
1140 if (!--depth) goto done2;
1141 if (depth < min_depth)
1142 error ("Containing expression ends prematurely");
1143 break;
1144
1145 case Sendcomment:
1146 if (!parse_sexp_ignore_comments)
1147 break;
1148 #if 0
1149 if (code != SYNTAX (c))
1150 /* For a two-char comment ender, we can assume
1151 it does end a comment. So scan back in a simple way. */
1152 {
1153 if (from != stop) from--;
1154 while (1)
1155 {
1156 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1157 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1158 break;
1159 if (from == stop)
1160 {
1161 if (depth == 0)
1162 goto done2;
1163 goto lose;
1164 }
1165 from--;
1166 if (SYNTAX_COMSTART_SECOND (c)
1167 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1168 && SYNTAX_COMMENT_STYLE (c) == comstyle
1169 && !char_quoted (from))
1170 break;
1171 }
1172 break;
1173 }
1174 #endif /* 0 */
1175
1176 /* Look back, counting the parity of string-quotes,
1177 and recording the comment-starters seen.
1178 When we reach a safe place, assume that's not in a string;
1179 then step the main scan to the earliest comment-starter seen
1180 an even number of string quotes away from the safe place.
1181
1182 OFROM[I] is position of the earliest comment-starter seen
1183 which is I+2X quotes from the comment-end.
1184 PARITY is current parity of quotes from the comment end. */
1185 {
1186 int parity = 0;
1187 char my_stringend = 0;
1188 int string_lossage = 0;
1189 int comment_end = from;
1190 int comstart_pos = 0;
1191 int comstart_parity = 0;
1192 int scanstart = from - 1;
1193
1194 /* At beginning of range to scan, we're outside of strings;
1195 that determines quote parity to the comment-end. */
1196 while (from != stop)
1197 {
1198 /* Move back and examine a character. */
1199 from--;
1200
1201 c = FETCH_CHAR (from);
1202 code = SYNTAX (c);
1203
1204 /* If this char is the second of a 2-char comment sequence,
1205 back up and give the pair the appropriate syntax. */
1206 if (from > stop && SYNTAX_COMEND_SECOND (c)
1207 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
1208 {
1209 code = Sendcomment;
1210 from--;
1211 c = FETCH_CHAR (from);
1212 }
1213
1214 /* If this char starts a 2-char comment start sequence,
1215 treat it like a 1-char comment starter. */
1216 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1217 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
1218 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
1219 code = Scomment;
1220
1221 /* Ignore escaped characters. */
1222 if (char_quoted (from))
1223 continue;
1224
1225 /* Track parity of quotes. */
1226 if (code == Sstring)
1227 {
1228 parity ^= 1;
1229 if (my_stringend == 0)
1230 my_stringend = c;
1231 /* If we have two kinds of string delimiters.
1232 There's no way to grok this scanning backwards. */
1233 else if (my_stringend != c)
1234 string_lossage = 1;
1235 }
1236
1237 /* Record comment-starters according to that
1238 quote-parity to the comment-end. */
1239 if (code == Scomment)
1240 {
1241 comstart_parity = parity;
1242 comstart_pos = from;
1243 }
1244
1245 /* If we find another earlier comment-ender,
1246 any comment-starts earlier than that don't count
1247 (because they go with the earlier comment-ender). */
1248 if (code == Sendcomment
1249 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1250 break;
1251
1252 /* Assume a defun-start point is outside of strings. */
1253 if (code == Sopen
1254 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
1255 break;
1256 }
1257
1258 if (comstart_pos == 0)
1259 from = comment_end;
1260 /* If the earliest comment starter
1261 is followed by uniform paired string quotes or none,
1262 we know it can't be inside a string
1263 since if it were then the comment ender would be inside one.
1264 So it does start a comment. Skip back to it. */
1265 else if (comstart_parity == 0 && !string_lossage)
1266 from = comstart_pos;
1267 else
1268 {
1269 /* We had two kinds of string delimiters mixed up
1270 together. Decode this going forwards.
1271 Scan fwd from the previous comment ender
1272 to the one in question; this records where we
1273 last passed a comment starter. */
1274 struct lisp_parse_state state;
1275 scan_sexps_forward (&state, find_defun_start (comment_end),
1276 comment_end - 1, -10000, 0, Qnil, 0);
1277 if (state.incomment)
1278 from = state.comstart;
1279 else
1280 /* We can't grok this as a comment; scan it normally. */
1281 from = comment_end;
1282 }
1283 }
1284 break;
1285
1286 case Sstring:
1287 stringterm = FETCH_CHAR (from);
1288 while (1)
1289 {
1290 if (from == stop) goto lose;
1291 if (!char_quoted (from - 1)
1292 && stringterm == FETCH_CHAR (from - 1))
1293 break;
1294 from--;
1295 }
1296 from--;
1297 if (!depth && sexpflag) goto done2;
1298 break;
1299 }
1300 }
1301
1302 /* Reached start of buffer. Error if within object, return nil if between */
1303 if (depth) goto lose;
1304
1305 immediate_quit = 0;
1306 return Qnil;
1307
1308 done2:
1309 count++;
1310 }
1311
1312
1313 immediate_quit = 0;
1314 XSETFASTINT (val, from);
1315 return val;
1316
1317 lose:
1318 error ("Unbalanced parentheses");
1319 /* NOTREACHED */
1320 }
1321
1322 static int
1323 char_quoted (pos)
1324 register int pos;
1325 {
1326 register enum syntaxcode code;
1327 register int beg = BEGV;
1328 register int quoted = 0;
1329
1330 while (pos > beg
1331 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
1332 || code == Sescape))
1333 pos--, quoted = !quoted;
1334 return quoted;
1335 }
1336
1337 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1338 "Scan from character number FROM by COUNT lists.\n\
1339 Returns the character number of the position thus found.\n\
1340 \n\
1341 If DEPTH is nonzero, paren depth begins counting from that value,\n\
1342 only places where the depth in parentheses becomes zero\n\
1343 are candidates for stopping; COUNT such places are counted.\n\
1344 Thus, a positive value for DEPTH means go out levels.\n\
1345 \n\
1346 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1347 \n\
1348 If the beginning or end of (the accessible part of) the buffer is reached\n\
1349 and the depth is wrong, an error is signaled.\n\
1350 If the depth is right but the count is not used up, nil is returned.")
1351 (from, count, depth)
1352 Lisp_Object from, count, depth;
1353 {
1354 CHECK_NUMBER (from, 0);
1355 CHECK_NUMBER (count, 1);
1356 CHECK_NUMBER (depth, 2);
1357
1358 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1359 }
1360
1361 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1362 "Scan from character number FROM by COUNT balanced expressions.\n\
1363 If COUNT is negative, scan backwards.\n\
1364 Returns the character number of the position thus found.\n\
1365 \n\
1366 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1367 \n\
1368 If the beginning or end of (the accessible part of) the buffer is reached\n\
1369 in the middle of a parenthetical grouping, an error is signaled.\n\
1370 If the beginning or end is reached between groupings\n\
1371 but before count is used up, nil is returned.")
1372 (from, count)
1373 Lisp_Object from, count;
1374 {
1375 CHECK_NUMBER (from, 0);
1376 CHECK_NUMBER (count, 1);
1377
1378 return scan_lists (XINT (from), XINT (count), 0, 1);
1379 }
1380
1381 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1382 0, 0, 0,
1383 "Move point backward over any number of chars with prefix syntax.\n\
1384 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1385 ()
1386 {
1387 int beg = BEGV;
1388 int pos = point;
1389
1390 while (pos > beg && !char_quoted (pos - 1)
1391 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
1392 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
1393 pos--;
1394
1395 SET_PT (pos);
1396
1397 return Qnil;
1398 }
1399 \f
1400 /* Parse forward from FROM to END,
1401 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1402 and return a description of the state of the parse at END.
1403 If STOPBEFORE is nonzero, stop at the start of an atom.
1404 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1405
1406 static void
1407 scan_sexps_forward (stateptr, from, end, targetdepth,
1408 stopbefore, oldstate, commentstop)
1409 struct lisp_parse_state *stateptr;
1410 register int from;
1411 int end, targetdepth, stopbefore;
1412 Lisp_Object oldstate;
1413 int commentstop;
1414 {
1415 struct lisp_parse_state state;
1416
1417 register enum syntaxcode code;
1418 struct level { int last, prev; };
1419 struct level levelstart[100];
1420 register struct level *curlevel = levelstart;
1421 struct level *endlevel = levelstart + 100;
1422 char prev;
1423 register int depth; /* Paren depth of current scanning location.
1424 level - levelstart equals this except
1425 when the depth becomes negative. */
1426 int mindepth; /* Lowest DEPTH value seen. */
1427 int start_quoted = 0; /* Nonzero means starting after a char quote */
1428 Lisp_Object tem;
1429
1430 immediate_quit = 1;
1431 QUIT;
1432
1433 if (NILP (oldstate))
1434 {
1435 depth = 0;
1436 state.instring = -1;
1437 state.incomment = 0;
1438 state.comstyle = 0; /* comment style a by default */
1439 }
1440 else
1441 {
1442 tem = Fcar (oldstate);
1443 if (!NILP (tem))
1444 depth = XINT (tem);
1445 else
1446 depth = 0;
1447
1448 oldstate = Fcdr (oldstate);
1449 oldstate = Fcdr (oldstate);
1450 oldstate = Fcdr (oldstate);
1451 tem = Fcar (oldstate);
1452 state.instring = !NILP (tem) ? XINT (tem) : -1;
1453
1454 oldstate = Fcdr (oldstate);
1455 tem = Fcar (oldstate);
1456 state.incomment = !NILP (tem);
1457
1458 oldstate = Fcdr (oldstate);
1459 tem = Fcar (oldstate);
1460 start_quoted = !NILP (tem);
1461
1462 /* if the eight element of the list is nil, we are in comment
1463 style a. if it is non-nil, we are in comment style b */
1464 oldstate = Fcdr (oldstate);
1465 oldstate = Fcdr (oldstate);
1466 tem = Fcar (oldstate);
1467 state.comstyle = !NILP (tem);
1468 }
1469 state.quoted = 0;
1470 mindepth = depth;
1471
1472 curlevel->prev = -1;
1473 curlevel->last = -1;
1474
1475 /* Enter the loop at a place appropriate for initial state. */
1476
1477 if (state.incomment) goto startincomment;
1478 if (state.instring >= 0)
1479 {
1480 if (start_quoted) goto startquotedinstring;
1481 goto startinstring;
1482 }
1483 if (start_quoted) goto startquoted;
1484
1485 while (from < end)
1486 {
1487 code = SYNTAX (FETCH_CHAR (from));
1488 from++;
1489 if (code == Scomment)
1490 state.comstart = from-1;
1491
1492 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1493 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
1494 {
1495 /* Record the comment style we have entered so that only
1496 the comment-end sequence of the same style actually
1497 terminates the comment section. */
1498 code = Scomment;
1499 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1500 state.comstart = from-1;
1501 from++;
1502 }
1503
1504 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
1505 continue;
1506 switch (SWITCH_ENUM_CAST (code))
1507 {
1508 case Sescape:
1509 case Scharquote:
1510 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1511 curlevel->last = from - 1;
1512 startquoted:
1513 if (from == end) goto endquoted;
1514 from++;
1515 goto symstarted;
1516 /* treat following character as a word constituent */
1517 case Sword:
1518 case Ssymbol:
1519 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1520 curlevel->last = from - 1;
1521 symstarted:
1522 while (from < end)
1523 {
1524 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1525 {
1526 case Scharquote:
1527 case Sescape:
1528 from++;
1529 if (from == end) goto endquoted;
1530 break;
1531 case Sword:
1532 case Ssymbol:
1533 case Squote:
1534 break;
1535 default:
1536 goto symdone;
1537 }
1538 from++;
1539 }
1540 symdone:
1541 curlevel->prev = curlevel->last;
1542 break;
1543
1544 startincomment:
1545 if (commentstop)
1546 goto done;
1547 if (from != BEGV)
1548 {
1549 /* Enter the loop in the middle so that we find
1550 a 2-char comment ender if we start in the middle of it. */
1551 prev = FETCH_CHAR (from - 1);
1552 goto startincomment_1;
1553 }
1554 /* At beginning of buffer, enter the loop the ordinary way. */
1555
1556 case Scomment:
1557 state.incomment = 1;
1558 if (commentstop)
1559 goto done;
1560 while (1)
1561 {
1562 if (from == end) goto done;
1563 prev = FETCH_CHAR (from);
1564 if (SYNTAX (prev) == Sendcomment
1565 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1566 /* Only terminate the comment section if the endcomment
1567 of the same style as the start sequence has been
1568 encountered. */
1569 break;
1570 from++;
1571 startincomment_1:
1572 if (from < end && SYNTAX_COMEND_FIRST (prev)
1573 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1574 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1575 /* Only terminate the comment section if the end-comment
1576 sequence of the same style as the start sequence has
1577 been encountered. */
1578 { from++; break; }
1579 }
1580 state.incomment = 0;
1581 state.comstyle = 0; /* reset the comment style */
1582 break;
1583
1584 case Sopen:
1585 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1586 depth++;
1587 /* curlevel++->last ran into compiler bug on Apollo */
1588 curlevel->last = from - 1;
1589 if (++curlevel == endlevel)
1590 error ("Nesting too deep for parser");
1591 curlevel->prev = -1;
1592 curlevel->last = -1;
1593 if (targetdepth == depth) goto done;
1594 break;
1595
1596 case Sclose:
1597 depth--;
1598 if (depth < mindepth)
1599 mindepth = depth;
1600 if (curlevel != levelstart)
1601 curlevel--;
1602 curlevel->prev = curlevel->last;
1603 if (targetdepth == depth) goto done;
1604 break;
1605
1606 case Sstring:
1607 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1608 curlevel->last = from - 1;
1609 state.instring = FETCH_CHAR (from - 1);
1610 startinstring:
1611 while (1)
1612 {
1613 if (from >= end) goto done;
1614 if (FETCH_CHAR (from) == state.instring) break;
1615 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1616 {
1617 case Scharquote:
1618 case Sescape:
1619 from++;
1620 startquotedinstring:
1621 if (from >= end) goto endquoted;
1622 }
1623 from++;
1624 }
1625 state.instring = -1;
1626 curlevel->prev = curlevel->last;
1627 from++;
1628 break;
1629
1630 case Smath:
1631 break;
1632 }
1633 }
1634 goto done;
1635
1636 stop: /* Here if stopping before start of sexp. */
1637 from--; /* We have just fetched the char that starts it; */
1638 goto done; /* but return the position before it. */
1639
1640 endquoted:
1641 state.quoted = 1;
1642 done:
1643 state.depth = depth;
1644 state.mindepth = mindepth;
1645 state.thislevelstart = curlevel->prev;
1646 state.prevlevelstart
1647 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1648 state.location = from;
1649 immediate_quit = 0;
1650
1651 *stateptr = state;
1652 }
1653
1654 /* This comment supplies the doc string for parse-partial-sexp,
1655 for make-docfile to see. We cannot put this in the real DEFUN
1656 due to limits in the Unix cpp.
1657
1658 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
1659 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1660 Parsing stops at TO or when certain criteria are met;\n\
1661 point is set to where parsing stops.\n\
1662 If fifth arg STATE is omitted or nil,\n\
1663 parsing assumes that FROM is the beginning of a function.\n\
1664 Value is a list of eight elements describing final state of parsing:\n\
1665 0. depth in parens.\n\
1666 1. character address of start of innermost containing list; nil if none.\n\
1667 2. character address of start of last complete sexp terminated.\n\
1668 3. non-nil if inside a string.\n\
1669 (it is the character that will terminate the string.)\n\
1670 4. t if inside a comment.\n\
1671 5. t if following a quote character.\n\
1672 6. the minimum paren-depth encountered during this scan.\n\
1673 7. t if in a comment of style `b'.\n\
1674 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1675 in parentheses becomes equal to TARGETDEPTH.\n\
1676 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1677 any character that starts a sexp.\n\
1678 Fifth arg STATE is an eight-list like what this function returns.\n\
1679 It is used to initialize the state of the parse. Its second and third
1680 elements are ignored.
1681 Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1682 (from, to, targetdepth, stopbefore, state, commentstop)
1683 */
1684
1685 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
1686 0 /* See immediately above */)
1687 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1688 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
1689 {
1690 struct lisp_parse_state state;
1691 int target;
1692
1693 if (!NILP (targetdepth))
1694 {
1695 CHECK_NUMBER (targetdepth, 3);
1696 target = XINT (targetdepth);
1697 }
1698 else
1699 target = -100000; /* We won't reach this depth */
1700
1701 validate_region (&from, &to);
1702 scan_sexps_forward (&state, XINT (from), XINT (to),
1703 target, !NILP (stopbefore), oldstate,
1704 !NILP (commentstop));
1705
1706 SET_PT (state.location);
1707
1708 return Fcons (make_number (state.depth),
1709 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1710 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1711 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1712 Fcons (state.incomment ? Qt : Qnil,
1713 Fcons (state.quoted ? Qt : Qnil,
1714 Fcons (make_number (state.mindepth),
1715 Fcons (state.comstyle ? Qt : Qnil,
1716 Qnil))))))));
1717 }
1718 \f
1719 init_syntax_once ()
1720 {
1721 register int i;
1722 Lisp_Object temp;
1723
1724 /* This has to be done here, before we call Fmake_char_table. */
1725 Qsyntax_table = intern ("syntax-table");
1726 staticpro (&Qsyntax_table);
1727
1728 /* Intern this now in case it isn't already done.
1729 Setting this variable twice is harmless.
1730 But don't staticpro it here--that is done in alloc.c. */
1731 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1732
1733 /* Now we are ready to set up this property, so we can
1734 create syntax tables. */
1735 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
1736
1737 temp = Fcons (make_number ((int) Swhitespace), Qnil);
1738
1739 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
1740
1741 temp = Fcons (make_number ((int) Sword), Qnil);
1742 for (i = 'a'; i <= 'z'; i++)
1743 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1744 for (i = 'A'; i <= 'Z'; i++)
1745 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1746 for (i = '0'; i <= '9'; i++)
1747 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1748
1749 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
1750 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
1751
1752 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
1753 Fcons (make_number (Sopen), make_number (')')));
1754 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
1755 Fcons (make_number (Sclose), make_number ('(')));
1756 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
1757 Fcons (make_number (Sopen), make_number (']')));
1758 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
1759 Fcons (make_number (Sclose), make_number ('[')));
1760 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
1761 Fcons (make_number (Sopen), make_number ('}')));
1762 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
1763 Fcons (make_number (Sclose), make_number ('{')));
1764 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
1765 Fcons (make_number ((int) Sstring), Qnil));
1766 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
1767 Fcons (make_number ((int) Sescape), Qnil));
1768
1769 temp = Fcons (make_number ((int) Ssymbol), Qnil);
1770 for (i = 0; i < 10; i++)
1771 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, "_-+*/&|<>="[i], temp);
1772
1773 temp = Fcons (make_number ((int) Spunct), Qnil);
1774 for (i = 0; i < 12; i++)
1775 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ".,;:?!#@~^'`"[i], temp);
1776 }
1777
1778 syms_of_syntax ()
1779 {
1780 Qsyntax_table_p = intern ("syntax-table-p");
1781 staticpro (&Qsyntax_table_p);
1782
1783 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1784 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1785
1786 words_include_escapes = 0;
1787 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1788 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1789
1790 defsubr (&Ssyntax_table_p);
1791 defsubr (&Ssyntax_table);
1792 defsubr (&Sstandard_syntax_table);
1793 defsubr (&Scopy_syntax_table);
1794 defsubr (&Sset_syntax_table);
1795 defsubr (&Schar_syntax);
1796 defsubr (&Smatching_paren);
1797 defsubr (&Smodify_syntax_entry);
1798 defsubr (&Sdescribe_syntax);
1799
1800 defsubr (&Sforward_word);
1801
1802 defsubr (&Sforward_comment);
1803 defsubr (&Sscan_lists);
1804 defsubr (&Sscan_sexps);
1805 defsubr (&Sbackward_prefix_chars);
1806 defsubr (&Sparse_partial_sexp);
1807 }