Update FSF's address in the preamble.
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "syntax.h"
28
29 Lisp_Object Qsyntax_table_p, Qsyntax_table;
30
31 static void scan_sexps_forward ();
32 static int char_quoted ();
33
34 int words_include_escapes;
35
36 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
37 if not compiled with GCC. No need to mark it, since it is used
38 only very temporarily. */
39 Lisp_Object syntax_temp;
40
41 /* This is the internal form of the parse state used in parse-partial-sexp. */
42
43 struct lisp_parse_state
44 {
45 int depth; /* Depth at end of parsing */
46 int instring; /* -1 if not within string, else desired terminator. */
47 int incomment; /* Nonzero if within a comment at end of parsing */
48 int comstyle; /* comment style a=0, or b=1 */
49 int quoted; /* Nonzero if just after an escape char at end of parsing */
50 int thislevelstart; /* Char number of most recent start-of-expression at current level */
51 int prevlevelstart; /* Char number of start of containing expression */
52 int location; /* Char number at which parsing stopped. */
53 int mindepth; /* Minimum depth seen while scanning. */
54 int comstart; /* Position just after last comment starter. */
55 };
56 \f
57 /* These variables are a cache for finding the start of a defun.
58 find_start_pos is the place for which the defun start was found.
59 find_start_value is the defun start position found for it.
60 find_start_buffer is the buffer it was found in.
61 find_start_begv is the BEGV value when it was found.
62 find_start_modiff is the value of MODIFF when it was found. */
63
64 static int find_start_pos;
65 static int find_start_value;
66 static struct buffer *find_start_buffer;
67 static int find_start_begv;
68 static int find_start_modiff;
69
70 /* Find a defun-start that is the last one before POS (or nearly the last).
71 We record what we find, so that another call in the same area
72 can return the same value right away. */
73
74 static int
75 find_defun_start (pos)
76 int pos;
77 {
78 int tem;
79 int shortage;
80
81 /* Use previous finding, if it's valid and applies to this inquiry. */
82 if (current_buffer == find_start_buffer
83 /* Reuse the defun-start even if POS is a little farther on.
84 POS might be in the next defun, but that's ok.
85 Our value may not be the best possible, but will still be usable. */
86 && pos <= find_start_pos + 1000
87 && pos >= find_start_value
88 && BEGV == find_start_begv
89 && MODIFF == find_start_modiff)
90 return find_start_value;
91
92 /* Back up to start of line. */
93 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
94
95 while (tem > BEGV)
96 {
97 /* Open-paren at start of line means we found our defun-start. */
98 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
99 break;
100 /* Move to beg of previous line. */
101 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
102 }
103
104 /* Record what we found, for the next try. */
105 find_start_value = tem;
106 find_start_buffer = current_buffer;
107 find_start_modiff = MODIFF;
108 find_start_begv = BEGV;
109 find_start_pos = pos;
110
111 return find_start_value;
112 }
113 \f
114 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
115 "Return t if OBJECT is a syntax table.\n\
116 Currently, any char-table counts as a syntax table.")
117 (object)
118 Lisp_Object object;
119 {
120 if (CHAR_TABLE_P (object)
121 && XCHAR_TABLE (object)->purpose == Qsyntax_table)
122 return Qt;
123 return Qnil;
124 }
125
126 static void
127 check_syntax_table (obj)
128 Lisp_Object obj;
129 {
130 if (!(CHAR_TABLE_P (obj)
131 && XCHAR_TABLE (obj)->purpose == Qsyntax_table))
132 wrong_type_argument (Qsyntax_table_p, obj);
133 }
134
135 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
136 "Return the current syntax table.\n\
137 This is the one specified by the current buffer.")
138 ()
139 {
140 return current_buffer->syntax_table;
141 }
142
143 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
144 Sstandard_syntax_table, 0, 0, 0,
145 "Return the standard syntax table.\n\
146 This is the one used for new buffers.")
147 ()
148 {
149 return Vstandard_syntax_table;
150 }
151
152 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
153 "Construct a new syntax table and return it.\n\
154 It is a copy of the TABLE, which defaults to the standard syntax table.")
155 (table)
156 Lisp_Object table;
157 {
158 Lisp_Object copy;
159
160 if (!NILP (table))
161 check_syntax_table (table);
162 else
163 table = Vstandard_syntax_table;
164
165 copy = Fcopy_sequence (table);
166 Fset_char_table_parent (copy, Vstandard_syntax_table);
167 return copy;
168 }
169
170 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
171 "Select a new syntax table for the current buffer.\n\
172 One argument, a syntax table.")
173 (table)
174 Lisp_Object table;
175 {
176 check_syntax_table (table);
177 current_buffer->syntax_table = table;
178 /* Indicate that this buffer now has a specified syntax table. */
179 current_buffer->local_var_flags
180 |= XFASTINT (buffer_local_flags.syntax_table);
181 return table;
182 }
183 \f
184 /* Convert a letter which signifies a syntax code
185 into the code it signifies.
186 This is used by modify-syntax-entry, and other things. */
187
188 unsigned char syntax_spec_code[0400] =
189 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
193 (char) Swhitespace, 0377, (char) Sstring, 0377,
194 (char) Smath, 0377, 0377, (char) Squote,
195 (char) Sopen, (char) Sclose, 0377, 0377,
196 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
197 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
198 0377, 0377, 0377, 0377,
199 (char) Scomment, 0377, (char) Sendcomment, 0377,
200 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
203 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
207 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
208 };
209
210 /* Indexed by syntax code, give the letter that describes it. */
211
212 char syntax_code_spec[14] =
213 {
214 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
215 };
216 \f
217 /* Look up the value for CHARACTER in syntax table TABLE's parent
218 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
219 for CHARACTER. It's actually used only when not compiled with GCC. */
220
221 Lisp_Object
222 syntax_parent_lookup (table, character)
223 Lisp_Object table;
224 int character;
225 {
226 Lisp_Object value;
227
228 while (1)
229 {
230 table = XCHAR_TABLE (table)->parent;
231 if (NILP (table))
232 return Qnil;
233
234 value = XCHAR_TABLE (table)->contents[character];
235 if (!NILP (value))
236 return value;
237 }
238 }
239
240 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
241 "Return the syntax code of CHARACTER, described by a character.\n\
242 For example, if CHARACTER is a word constituent,\n\
243 the character `w' is returned.\n\
244 The characters that correspond to various syntax codes\n\
245 are listed in the documentation of `modify-syntax-entry'.")
246 (character)
247 Lisp_Object character;
248 {
249 int char_int;
250 CHECK_NUMBER (character, 0);
251 char_int = XINT (character);
252 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
253 }
254
255 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
256 "Return the matching parenthesis of CHARACTER, or nil if none.")
257 (character)
258 Lisp_Object character;
259 {
260 int char_int, code;
261 CHECK_NUMBER (character, 0);
262 char_int = XINT (character);
263 code = SYNTAX (char_int);
264 if (code == Sopen || code == Sclose)
265 return make_number (SYNTAX_MATCH (char_int));
266 return Qnil;
267 }
268
269 /* This comment supplies the doc string for modify-syntax-entry,
270 for make-docfile to see. We cannot put this in the real DEFUN
271 due to limits in the Unix cpp.
272
273 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
274 "Set syntax for character CHAR according to string S.\n\
275 The syntax is changed only for table TABLE, which defaults to\n\
276 the current buffer's syntax table.\n\
277 The first character of S should be one of the following:\n\
278 Space or - whitespace syntax. w word constituent.\n\
279 _ symbol constituent. . punctuation.\n\
280 ( open-parenthesis. ) close-parenthesis.\n\
281 \" string quote. \\ escape.\n\
282 $ paired delimiter. ' expression quote or prefix operator.\n\
283 < comment starter. > comment ender.\n\
284 / character-quote. @ inherit from `standard-syntax-table'.\n\
285 \n\
286 Only single-character comment start and end sequences are represented thus.\n\
287 Two-character sequences are represented as described below.\n\
288 The second character of S is the matching parenthesis,\n\
289 used only if the first character is `(' or `)'.\n\
290 Any additional characters are flags.\n\
291 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
292 1 means CHAR is the start of a two-char comment start sequence.\n\
293 2 means CHAR is the second character of such a sequence.\n\
294 3 means CHAR is the start of a two-char comment end sequence.\n\
295 4 means CHAR is the second character of such a sequence.\n\
296 \n\
297 There can be up to two orthogonal comment sequences. This is to support\n\
298 language modes such as C++. By default, all comment sequences are of style\n\
299 a, but you can set the comment sequence style to b (on the second character\n\
300 of a comment-start, or the first character of a comment-end sequence) using\n\
301 this flag:\n\
302 b means CHAR is part of comment sequence b.\n\
303 \n\
304 p means CHAR is a prefix character for `backward-prefix-chars';\n\
305 such characters are treated as whitespace when they occur\n\
306 between expressions.")
307 (char, s, table)
308 */
309
310 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
311 /* I really don't know why this is interactive
312 help-form should at least be made useful whilst reading the second arg
313 */
314 "cSet syntax for character: \nsSet syntax for %s to: ",
315 0 /* See immediately above */)
316 (c, newentry, syntax_table)
317 Lisp_Object c, newentry, syntax_table;
318 {
319 register unsigned char *p;
320 register enum syntaxcode code;
321 int val;
322 Lisp_Object match;
323
324 CHECK_NUMBER (c, 0);
325 CHECK_STRING (newentry, 1);
326
327 if (NILP (syntax_table))
328 syntax_table = current_buffer->syntax_table;
329 else
330 check_syntax_table (syntax_table);
331
332 p = XSTRING (newentry)->data;
333 code = (enum syntaxcode) syntax_spec_code[*p++];
334 if (((int) code & 0377) == 0377)
335 error ("invalid syntax description letter: %c", c);
336
337 if (code == Sinherit)
338 {
339 SET_RAW_SYNTAX_ENTRY (syntax_table, c, Qnil);
340 return Qnil;
341 }
342
343 if (*p)
344 {
345 XSETINT (match, *p++);
346 if (XFASTINT (match) == ' ')
347 match = Qnil;
348 }
349 else
350 match = Qnil;
351
352 val = (int) code;
353 while (*p)
354 switch (*p++)
355 {
356 case '1':
357 val |= 1 << 16;
358 break;
359
360 case '2':
361 val |= 1 << 17;
362 break;
363
364 case '3':
365 val |= 1 << 18;
366 break;
367
368 case '4':
369 val |= 1 << 19;
370 break;
371
372 case 'p':
373 val |= 1 << 20;
374 break;
375
376 case 'b':
377 val |= 1 << 21;
378 break;
379 }
380
381 SET_RAW_SYNTAX_ENTRY (syntax_table, c,
382 Fcons (make_number (val), match));
383
384 return Qnil;
385 }
386 \f
387 /* Dump syntax table to buffer in human-readable format */
388
389 static void
390 describe_syntax (value)
391 Lisp_Object value;
392 {
393 register enum syntaxcode code;
394 char desc, match, start1, start2, end1, end2, prefix, comstyle;
395 char str[2];
396 Lisp_Object first, match_lisp;
397
398 Findent_to (make_number (16), make_number (1));
399
400 if (NILP (value))
401 {
402 insert_string ("inherit");
403 return;
404 }
405
406 if (!CONSP (value))
407 {
408 insert_string ("invalid");
409 return;
410 }
411
412 first = XCONS (value)->car;
413 match_lisp = XCONS (value)->cdr;
414
415 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
416 {
417 insert_string ("invalid");
418 return;
419 }
420
421 code = (enum syntaxcode) (first & 0377);
422 start1 = (XINT (first) >> 16) & 1;
423 start2 = (XINT (first) >> 17) & 1;
424 end1 = (XINT (first) >> 18) & 1;
425 end2 = (XINT (first) >> 19) & 1;
426 prefix = (XINT (first) >> 20) & 1;
427 comstyle = (XINT (first) >> 21) & 1;
428
429 if ((int) code < 0 || (int) code >= (int) Smax)
430 {
431 insert_string ("invalid");
432 return;
433 }
434 desc = syntax_code_spec[(int) code];
435
436 str[0] = desc, str[1] = 0;
437 insert (str, 1);
438
439 str[0] = !NILP (match_lisp) ? XINT (match_lisp) : ' ';
440 insert (str, 1);
441
442 if (start1)
443 insert ("1", 1);
444 if (start2)
445 insert ("2", 1);
446
447 if (end1)
448 insert ("3", 1);
449 if (end2)
450 insert ("4", 1);
451
452 if (prefix)
453 insert ("p", 1);
454 if (comstyle)
455 insert ("b", 1);
456
457 insert_string ("\twhich means: ");
458
459 switch (SWITCH_ENUM_CAST (code))
460 {
461 case Swhitespace:
462 insert_string ("whitespace"); break;
463 case Spunct:
464 insert_string ("punctuation"); break;
465 case Sword:
466 insert_string ("word"); break;
467 case Ssymbol:
468 insert_string ("symbol"); break;
469 case Sopen:
470 insert_string ("open"); break;
471 case Sclose:
472 insert_string ("close"); break;
473 case Squote:
474 insert_string ("quote"); break;
475 case Sstring:
476 insert_string ("string"); break;
477 case Smath:
478 insert_string ("math"); break;
479 case Sescape:
480 insert_string ("escape"); break;
481 case Scharquote:
482 insert_string ("charquote"); break;
483 case Scomment:
484 insert_string ("comment"); break;
485 case Sendcomment:
486 insert_string ("endcomment"); break;
487 default:
488 insert_string ("invalid");
489 return;
490 }
491
492 if (!NILP (match_lisp))
493 {
494 insert_string (", matches ");
495 insert_char (XINT (match_lisp));
496 }
497
498 if (start1)
499 insert_string (",\n\t is the first character of a comment-start sequence");
500 if (start2)
501 insert_string (",\n\t is the second character of a comment-start sequence");
502
503 if (end1)
504 insert_string (",\n\t is the first character of a comment-end sequence");
505 if (end2)
506 insert_string (",\n\t is the second character of a comment-end sequence");
507 if (comstyle)
508 insert_string (" (comment style b)");
509
510 if (prefix)
511 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
512
513 insert_string ("\n");
514 }
515
516 static Lisp_Object
517 describe_syntax_1 (vector)
518 Lisp_Object vector;
519 {
520 struct buffer *old = current_buffer;
521 set_buffer_internal (XBUFFER (Vstandard_output));
522 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
523 call0 (intern ("help-mode"));
524 set_buffer_internal (old);
525 return Qnil;
526 }
527
528 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
529 "Describe the syntax specifications in the syntax table.\n\
530 The descriptions are inserted in a buffer, which is then displayed.")
531 ()
532 {
533 internal_with_output_to_temp_buffer
534 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
535
536 return Qnil;
537 }
538 \f
539 /* Return the position across COUNT words from FROM.
540 If that many words cannot be found before the end of the buffer, return 0.
541 COUNT negative means scan backward and stop at word beginning. */
542
543 scan_words (from, count)
544 register int from, count;
545 {
546 register int beg = BEGV;
547 register int end = ZV;
548 register int code;
549 int charcode;
550
551 immediate_quit = 1;
552 QUIT;
553
554 while (count > 0)
555 {
556 while (1)
557 {
558 if (from == end)
559 {
560 immediate_quit = 0;
561 return 0;
562 }
563 charcode = FETCH_CHAR (from);
564 code = SYNTAX (charcode);
565 if (words_include_escapes
566 && (code == Sescape || code == Scharquote))
567 break;
568 if (code == Sword)
569 break;
570 from++;
571 }
572 while (1)
573 {
574 if (from == end) break;
575 charcode = FETCH_CHAR (from);
576 code = SYNTAX (charcode);
577 if (!(words_include_escapes
578 && (code == Sescape || code == Scharquote)))
579 if (code != Sword)
580 break;
581 from++;
582 }
583 count--;
584 }
585 while (count < 0)
586 {
587 while (1)
588 {
589 if (from == beg)
590 {
591 immediate_quit = 0;
592 return 0;
593 }
594 charcode = FETCH_CHAR (from - 1);
595 code = SYNTAX (charcode);
596 if (words_include_escapes
597 && (code == Sescape || code == Scharquote))
598 break;
599 if (code == Sword)
600 break;
601 from--;
602 }
603 while (1)
604 {
605 if (from == beg) break;
606 charcode = FETCH_CHAR (from - 1);
607 code = SYNTAX (charcode);
608 if (!(words_include_escapes
609 && (code == Sescape || code == Scharquote)))
610 if (code != Sword)
611 break;
612 from--;
613 }
614 count++;
615 }
616
617 immediate_quit = 0;
618
619 return from;
620 }
621
622 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
623 "Move point forward ARG words (backward if ARG is negative).\n\
624 Normally returns t.\n\
625 If an edge of the buffer is reached, point is left there\n\
626 and nil is returned.")
627 (count)
628 Lisp_Object count;
629 {
630 int val;
631 CHECK_NUMBER (count, 0);
632
633 if (!(val = scan_words (point, XINT (count))))
634 {
635 SET_PT (XINT (count) > 0 ? ZV : BEGV);
636 return Qnil;
637 }
638 SET_PT (val);
639 return Qt;
640 }
641 \f
642 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
643 "Move forward across up to N comments. If N is negative, move backward.\n\
644 Stop scanning if we find something other than a comment or whitespace.\n\
645 Set point to where scanning stops.\n\
646 If N comments are found as expected, with nothing except whitespace\n\
647 between them, return t; otherwise return nil.")
648 (count)
649 Lisp_Object count;
650 {
651 register int from;
652 register int stop;
653 register int c, c1;
654 register enum syntaxcode code;
655 int comstyle = 0; /* style of comment encountered */
656 int found;
657 int count1;
658
659 CHECK_NUMBER (count, 0);
660 count1 = XINT (count);
661
662 immediate_quit = 1;
663 QUIT;
664
665 from = PT;
666
667 while (count1 > 0)
668 {
669 stop = ZV;
670 do
671 {
672 if (from == stop)
673 {
674 SET_PT (from);
675 immediate_quit = 0;
676 return Qnil;
677 }
678 c = FETCH_CHAR (from);
679 code = SYNTAX (c);
680 from++;
681 comstyle = 0;
682 if (from < stop && SYNTAX_COMSTART_FIRST (c)
683 && (c1 = FETCH_CHAR (from),
684 SYNTAX_COMSTART_SECOND (c1)))
685 {
686 /* We have encountered a comment start sequence and we
687 are ignoring all text inside comments. We must record
688 the comment style this sequence begins so that later,
689 only a comment end of the same style actually ends
690 the comment section. */
691 code = Scomment;
692 comstyle = SYNTAX_COMMENT_STYLE (c1);
693 from++;
694 }
695 }
696 while (code == Swhitespace || code == Sendcomment);
697 if (code != Scomment)
698 {
699 immediate_quit = 0;
700 SET_PT (from - 1);
701 return Qnil;
702 }
703 /* We're at the start of a comment. */
704 while (1)
705 {
706 if (from == stop)
707 {
708 immediate_quit = 0;
709 SET_PT (from);
710 return Qnil;
711 }
712 c = FETCH_CHAR (from);
713 from++;
714 if (SYNTAX (c) == Sendcomment
715 && SYNTAX_COMMENT_STYLE (c) == comstyle)
716 /* we have encountered a comment end of the same style
717 as the comment sequence which began this comment
718 section */
719 break;
720 if (from < stop && SYNTAX_COMEND_FIRST (c)
721 && (c1 = FETCH_CHAR (from),
722 SYNTAX_COMEND_SECOND (c1))
723 && SYNTAX_COMMENT_STYLE (c) == comstyle)
724 /* we have encountered a comment end of the same style
725 as the comment sequence which began this comment
726 section */
727 { from++; break; }
728 }
729 /* We have skipped one comment. */
730 count1--;
731 }
732
733 while (count1 < 0)
734 {
735 stop = BEGV;
736 while (from > stop)
737 {
738 int quoted;
739
740 from--;
741 quoted = char_quoted (from);
742 if (quoted)
743 from--;
744 c = FETCH_CHAR (from);
745 code = SYNTAX (c);
746 comstyle = 0;
747 if (code == Sendcomment)
748 comstyle = SYNTAX_COMMENT_STYLE (c);
749 if (from > stop && SYNTAX_COMEND_SECOND (c)
750 && (c1 = FETCH_CHAR (from - 1),
751 SYNTAX_COMEND_FIRST (c1))
752 && !char_quoted (from - 1))
753 {
754 /* We must record the comment style encountered so that
755 later, we can match only the proper comment begin
756 sequence of the same style. */
757 code = Sendcomment;
758 comstyle = SYNTAX_COMMENT_STYLE (c1);
759 from--;
760 }
761
762 if (code == Sendcomment && !quoted)
763 {
764 #if 0
765 if (code != SYNTAX (c))
766 /* For a two-char comment ender, we can assume
767 it does end a comment. So scan back in a simple way. */
768 {
769 if (from != stop) from--;
770 while (1)
771 {
772 if ((c = FETCH_CHAR (from),
773 SYNTAX (c) == Scomment)
774 && SYNTAX_COMMENT_STYLE (c) == comstyle)
775 break;
776 if (from == stop)
777 {
778 immediate_quit = 0;
779 SET_PT (from);
780 return Qnil;
781 }
782 from--;
783 if (SYNTAX_COMSTART_SECOND (c)
784 && (c1 = FETCH_CHAR (from),
785 SYNTAX_COMSTART_FIRST (c1))
786 && SYNTAX_COMMENT_STYLE (c) == comstyle
787 && !char_quoted (from))
788 break;
789 }
790 break;
791 }
792 #endif /* 0 */
793
794 /* Look back, counting the parity of string-quotes,
795 and recording the comment-starters seen.
796 When we reach a safe place, assume that's not in a string;
797 then step the main scan to the earliest comment-starter seen
798 an even number of string quotes away from the safe place.
799
800 OFROM[I] is position of the earliest comment-starter seen
801 which is I+2X quotes from the comment-end.
802 PARITY is current parity of quotes from the comment end. */
803 {
804 int parity = 0;
805 char my_stringend = 0;
806 int string_lossage = 0;
807 int comment_end = from;
808 int comstart_pos = 0;
809 int comstart_parity = 0;
810 int scanstart = from - 1;
811
812 /* At beginning of range to scan, we're outside of strings;
813 that determines quote parity to the comment-end. */
814 while (from != stop)
815 {
816 /* Move back and examine a character. */
817 from--;
818
819 c = FETCH_CHAR (from);
820 code = SYNTAX (c);
821
822 /* If this char is the second of a 2-char comment sequence,
823 back up and give the pair the appropriate syntax. */
824 if (from > stop && SYNTAX_COMEND_SECOND (c)
825 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
826 {
827 code = Sendcomment;
828 from--;
829 c = FETCH_CHAR (from);
830 }
831
832 /* If this char starts a 2-char comment start sequence,
833 treat it like a 1-char comment starter. */
834 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
835 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
836 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
837 code = Scomment;
838
839 /* Ignore escaped characters. */
840 if (char_quoted (from))
841 continue;
842
843 /* Track parity of quotes. */
844 if (code == Sstring)
845 {
846 parity ^= 1;
847 if (my_stringend == 0)
848 my_stringend = c;
849 /* If we have two kinds of string delimiters.
850 There's no way to grok this scanning backwards. */
851 else if (my_stringend != c)
852 string_lossage = 1;
853 }
854
855 /* Record comment-starters according to that
856 quote-parity to the comment-end. */
857 if (code == Scomment)
858 {
859 comstart_parity = parity;
860 comstart_pos = from;
861 }
862
863 /* If we find another earlier comment-ender,
864 any comment-starts earlier than that don't count
865 (because they go with the earlier comment-ender). */
866 if (code == Sendcomment
867 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
868 break;
869
870 /* Assume a defun-start point is outside of strings. */
871 if (code == Sopen
872 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
873 break;
874 }
875
876 if (comstart_pos == 0)
877 from = comment_end;
878 /* If the earliest comment starter
879 is followed by uniform paired string quotes or none,
880 we know it can't be inside a string
881 since if it were then the comment ender would be inside one.
882 So it does start a comment. Skip back to it. */
883 else if (comstart_parity == 0 && !string_lossage)
884 from = comstart_pos;
885 else
886 {
887 /* We had two kinds of string delimiters mixed up
888 together. Decode this going forwards.
889 Scan fwd from the previous comment ender
890 to the one in question; this records where we
891 last passed a comment starter. */
892 struct lisp_parse_state state;
893 scan_sexps_forward (&state, find_defun_start (comment_end),
894 comment_end - 1, -10000, 0, Qnil, 0);
895 if (state.incomment)
896 from = state.comstart;
897 else
898 /* We can't grok this as a comment; scan it normally. */
899 from = comment_end;
900 }
901 }
902 /* We have skipped one comment. */
903 break;
904 }
905 else if ((code != Swhitespace && code != Scomment) || quoted)
906 {
907 immediate_quit = 0;
908 SET_PT (from + 1);
909 return Qnil;
910 }
911 }
912
913 count1++;
914 }
915
916 SET_PT (from);
917 immediate_quit = 0;
918 return Qt;
919 }
920 \f
921 int parse_sexp_ignore_comments;
922
923 Lisp_Object
924 scan_lists (from, count, depth, sexpflag)
925 register int from;
926 int count, depth, sexpflag;
927 {
928 Lisp_Object val;
929 register int stop;
930 register int c;
931 unsigned char stringterm;
932 int quoted;
933 int mathexit = 0;
934 register enum syntaxcode code;
935 int min_depth = depth; /* Err out if depth gets less than this. */
936 int comstyle = 0; /* style of comment encountered */
937
938 if (depth > 0) min_depth = 0;
939
940 immediate_quit = 1;
941 QUIT;
942
943 while (count > 0)
944 {
945 stop = ZV;
946 while (from < stop)
947 {
948 c = FETCH_CHAR (from);
949 code = SYNTAX (c);
950 from++;
951 if (from < stop && SYNTAX_COMSTART_FIRST (c)
952 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
953 && parse_sexp_ignore_comments)
954 {
955 /* we have encountered a comment start sequence and we
956 are ignoring all text inside comments. we must record
957 the comment style this sequence begins so that later,
958 only a comment end of the same style actually ends
959 the comment section */
960 code = Scomment;
961 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
962 from++;
963 }
964
965 if (SYNTAX_PREFIX (c))
966 continue;
967
968 switch (SWITCH_ENUM_CAST (code))
969 {
970 case Sescape:
971 case Scharquote:
972 if (from == stop) goto lose;
973 from++;
974 /* treat following character as a word constituent */
975 case Sword:
976 case Ssymbol:
977 if (depth || !sexpflag) break;
978 /* This word counts as a sexp; return at end of it. */
979 while (from < stop)
980 {
981 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
982 {
983 case Scharquote:
984 case Sescape:
985 from++;
986 if (from == stop) goto lose;
987 break;
988 case Sword:
989 case Ssymbol:
990 case Squote:
991 break;
992 default:
993 goto done;
994 }
995 from++;
996 }
997 goto done;
998
999 case Scomment:
1000 if (!parse_sexp_ignore_comments) break;
1001 while (1)
1002 {
1003 if (from == stop)
1004 {
1005 if (depth == 0)
1006 goto done;
1007 goto lose;
1008 }
1009 c = FETCH_CHAR (from);
1010 if (SYNTAX (c) == Sendcomment
1011 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1012 /* we have encountered a comment end of the same style
1013 as the comment sequence which began this comment
1014 section */
1015 break;
1016 from++;
1017 if (from < stop && SYNTAX_COMEND_FIRST (c)
1018 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1019 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1020 /* we have encountered a comment end of the same style
1021 as the comment sequence which began this comment
1022 section */
1023 { from++; break; }
1024 }
1025 break;
1026
1027 case Smath:
1028 if (!sexpflag)
1029 break;
1030 if (from != stop && c == FETCH_CHAR (from))
1031 from++;
1032 if (mathexit)
1033 {
1034 mathexit = 0;
1035 goto close1;
1036 }
1037 mathexit = 1;
1038
1039 case Sopen:
1040 if (!++depth) goto done;
1041 break;
1042
1043 case Sclose:
1044 close1:
1045 if (!--depth) goto done;
1046 if (depth < min_depth)
1047 error ("Containing expression ends prematurely");
1048 break;
1049
1050 case Sstring:
1051 stringterm = FETCH_CHAR (from - 1);
1052 while (1)
1053 {
1054 if (from >= stop) goto lose;
1055 if (FETCH_CHAR (from) == stringterm) break;
1056 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1057 {
1058 case Scharquote:
1059 case Sescape:
1060 from++;
1061 }
1062 from++;
1063 }
1064 from++;
1065 if (!depth && sexpflag) goto done;
1066 break;
1067 }
1068 }
1069
1070 /* Reached end of buffer. Error if within object, return nil if between */
1071 if (depth) goto lose;
1072
1073 immediate_quit = 0;
1074 return Qnil;
1075
1076 /* End of object reached */
1077 done:
1078 count--;
1079 }
1080
1081 while (count < 0)
1082 {
1083 stop = BEGV;
1084 while (from > stop)
1085 {
1086 from--;
1087 if (quoted = char_quoted (from))
1088 from--;
1089 c = FETCH_CHAR (from);
1090 code = SYNTAX (c);
1091 comstyle = 0;
1092 if (code == Sendcomment)
1093 comstyle = SYNTAX_COMMENT_STYLE (c);
1094 if (from > stop && SYNTAX_COMEND_SECOND (c)
1095 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
1096 && !char_quoted (from - 1)
1097 && parse_sexp_ignore_comments)
1098 {
1099 /* we must record the comment style encountered so that
1100 later, we can match only the proper comment begin
1101 sequence of the same style */
1102 code = Sendcomment;
1103 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
1104 from--;
1105 }
1106
1107 if (SYNTAX_PREFIX (c))
1108 continue;
1109
1110 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1111 {
1112 case Sword:
1113 case Ssymbol:
1114 if (depth || !sexpflag) break;
1115 /* This word counts as a sexp; count object finished after passing it. */
1116 while (from > stop)
1117 {
1118 quoted = char_quoted (from - 1);
1119 if (quoted)
1120 from--;
1121 if (! (quoted || SYNTAX (FETCH_CHAR (from - 1)) == Sword
1122 || SYNTAX (FETCH_CHAR (from - 1)) == Ssymbol
1123 || SYNTAX (FETCH_CHAR (from - 1)) == Squote))
1124 goto done2;
1125 from--;
1126 }
1127 goto done2;
1128
1129 case Smath:
1130 if (!sexpflag)
1131 break;
1132 if (from != stop && c == FETCH_CHAR (from - 1))
1133 from--;
1134 if (mathexit)
1135 {
1136 mathexit = 0;
1137 goto open2;
1138 }
1139 mathexit = 1;
1140
1141 case Sclose:
1142 if (!++depth) goto done2;
1143 break;
1144
1145 case Sopen:
1146 open2:
1147 if (!--depth) goto done2;
1148 if (depth < min_depth)
1149 error ("Containing expression ends prematurely");
1150 break;
1151
1152 case Sendcomment:
1153 if (!parse_sexp_ignore_comments)
1154 break;
1155 #if 0
1156 if (code != SYNTAX (c))
1157 /* For a two-char comment ender, we can assume
1158 it does end a comment. So scan back in a simple way. */
1159 {
1160 if (from != stop) from--;
1161 while (1)
1162 {
1163 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1164 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1165 break;
1166 if (from == stop)
1167 {
1168 if (depth == 0)
1169 goto done2;
1170 goto lose;
1171 }
1172 from--;
1173 if (SYNTAX_COMSTART_SECOND (c)
1174 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1175 && SYNTAX_COMMENT_STYLE (c) == comstyle
1176 && !char_quoted (from))
1177 break;
1178 }
1179 break;
1180 }
1181 #endif /* 0 */
1182
1183 /* Look back, counting the parity of string-quotes,
1184 and recording the comment-starters seen.
1185 When we reach a safe place, assume that's not in a string;
1186 then step the main scan to the earliest comment-starter seen
1187 an even number of string quotes away from the safe place.
1188
1189 OFROM[I] is position of the earliest comment-starter seen
1190 which is I+2X quotes from the comment-end.
1191 PARITY is current parity of quotes from the comment end. */
1192 {
1193 int parity = 0;
1194 char my_stringend = 0;
1195 int string_lossage = 0;
1196 int comment_end = from;
1197 int comstart_pos = 0;
1198 int comstart_parity = 0;
1199 int scanstart = from - 1;
1200
1201 /* At beginning of range to scan, we're outside of strings;
1202 that determines quote parity to the comment-end. */
1203 while (from != stop)
1204 {
1205 /* Move back and examine a character. */
1206 from--;
1207
1208 c = FETCH_CHAR (from);
1209 code = SYNTAX (c);
1210
1211 /* If this char is the second of a 2-char comment sequence,
1212 back up and give the pair the appropriate syntax. */
1213 if (from > stop && SYNTAX_COMEND_SECOND (c)
1214 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
1215 {
1216 code = Sendcomment;
1217 from--;
1218 c = FETCH_CHAR (from);
1219 }
1220
1221 /* If this char starts a 2-char comment start sequence,
1222 treat it like a 1-char comment starter. */
1223 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1224 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
1225 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
1226 code = Scomment;
1227
1228 /* Ignore escaped characters. */
1229 if (char_quoted (from))
1230 continue;
1231
1232 /* Track parity of quotes. */
1233 if (code == Sstring)
1234 {
1235 parity ^= 1;
1236 if (my_stringend == 0)
1237 my_stringend = c;
1238 /* If we have two kinds of string delimiters.
1239 There's no way to grok this scanning backwards. */
1240 else if (my_stringend != c)
1241 string_lossage = 1;
1242 }
1243
1244 /* Record comment-starters according to that
1245 quote-parity to the comment-end. */
1246 if (code == Scomment)
1247 {
1248 comstart_parity = parity;
1249 comstart_pos = from;
1250 }
1251
1252 /* If we find another earlier comment-ender,
1253 any comment-starts earlier than that don't count
1254 (because they go with the earlier comment-ender). */
1255 if (code == Sendcomment
1256 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1257 break;
1258
1259 /* Assume a defun-start point is outside of strings. */
1260 if (code == Sopen
1261 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
1262 break;
1263 }
1264
1265 if (comstart_pos == 0)
1266 from = comment_end;
1267 /* If the earliest comment starter
1268 is followed by uniform paired string quotes or none,
1269 we know it can't be inside a string
1270 since if it were then the comment ender would be inside one.
1271 So it does start a comment. Skip back to it. */
1272 else if (comstart_parity == 0 && !string_lossage)
1273 from = comstart_pos;
1274 else
1275 {
1276 /* We had two kinds of string delimiters mixed up
1277 together. Decode this going forwards.
1278 Scan fwd from the previous comment ender
1279 to the one in question; this records where we
1280 last passed a comment starter. */
1281 struct lisp_parse_state state;
1282 scan_sexps_forward (&state, find_defun_start (comment_end),
1283 comment_end - 1, -10000, 0, Qnil, 0);
1284 if (state.incomment)
1285 from = state.comstart;
1286 else
1287 /* We can't grok this as a comment; scan it normally. */
1288 from = comment_end;
1289 }
1290 }
1291 break;
1292
1293 case Sstring:
1294 stringterm = FETCH_CHAR (from);
1295 while (1)
1296 {
1297 if (from == stop) goto lose;
1298 if (!char_quoted (from - 1)
1299 && stringterm == FETCH_CHAR (from - 1))
1300 break;
1301 from--;
1302 }
1303 from--;
1304 if (!depth && sexpflag) goto done2;
1305 break;
1306 }
1307 }
1308
1309 /* Reached start of buffer. Error if within object, return nil if between */
1310 if (depth) goto lose;
1311
1312 immediate_quit = 0;
1313 return Qnil;
1314
1315 done2:
1316 count++;
1317 }
1318
1319
1320 immediate_quit = 0;
1321 XSETFASTINT (val, from);
1322 return val;
1323
1324 lose:
1325 error ("Unbalanced parentheses");
1326 /* NOTREACHED */
1327 }
1328
1329 static int
1330 char_quoted (pos)
1331 register int pos;
1332 {
1333 register enum syntaxcode code;
1334 register int beg = BEGV;
1335 register int quoted = 0;
1336
1337 while (pos > beg
1338 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
1339 || code == Sescape))
1340 pos--, quoted = !quoted;
1341 return quoted;
1342 }
1343
1344 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1345 "Scan from character number FROM by COUNT lists.\n\
1346 Returns the character number of the position thus found.\n\
1347 \n\
1348 If DEPTH is nonzero, paren depth begins counting from that value,\n\
1349 only places where the depth in parentheses becomes zero\n\
1350 are candidates for stopping; COUNT such places are counted.\n\
1351 Thus, a positive value for DEPTH means go out levels.\n\
1352 \n\
1353 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1354 \n\
1355 If the beginning or end of (the accessible part of) the buffer is reached\n\
1356 and the depth is wrong, an error is signaled.\n\
1357 If the depth is right but the count is not used up, nil is returned.")
1358 (from, count, depth)
1359 Lisp_Object from, count, depth;
1360 {
1361 CHECK_NUMBER (from, 0);
1362 CHECK_NUMBER (count, 1);
1363 CHECK_NUMBER (depth, 2);
1364
1365 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1366 }
1367
1368 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1369 "Scan from character number FROM by COUNT balanced expressions.\n\
1370 If COUNT is negative, scan backwards.\n\
1371 Returns the character number of the position thus found.\n\
1372 \n\
1373 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1374 \n\
1375 If the beginning or end of (the accessible part of) the buffer is reached\n\
1376 in the middle of a parenthetical grouping, an error is signaled.\n\
1377 If the beginning or end is reached between groupings\n\
1378 but before count is used up, nil is returned.")
1379 (from, count)
1380 Lisp_Object from, count;
1381 {
1382 CHECK_NUMBER (from, 0);
1383 CHECK_NUMBER (count, 1);
1384
1385 return scan_lists (XINT (from), XINT (count), 0, 1);
1386 }
1387
1388 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1389 0, 0, 0,
1390 "Move point backward over any number of chars with prefix syntax.\n\
1391 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1392 ()
1393 {
1394 int beg = BEGV;
1395 int pos = point;
1396
1397 while (pos > beg && !char_quoted (pos - 1)
1398 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
1399 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
1400 pos--;
1401
1402 SET_PT (pos);
1403
1404 return Qnil;
1405 }
1406 \f
1407 /* Parse forward from FROM to END,
1408 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1409 and return a description of the state of the parse at END.
1410 If STOPBEFORE is nonzero, stop at the start of an atom.
1411 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1412
1413 static void
1414 scan_sexps_forward (stateptr, from, end, targetdepth,
1415 stopbefore, oldstate, commentstop)
1416 struct lisp_parse_state *stateptr;
1417 register int from;
1418 int end, targetdepth, stopbefore;
1419 Lisp_Object oldstate;
1420 int commentstop;
1421 {
1422 struct lisp_parse_state state;
1423
1424 register enum syntaxcode code;
1425 struct level { int last, prev; };
1426 struct level levelstart[100];
1427 register struct level *curlevel = levelstart;
1428 struct level *endlevel = levelstart + 100;
1429 char prev;
1430 register int depth; /* Paren depth of current scanning location.
1431 level - levelstart equals this except
1432 when the depth becomes negative. */
1433 int mindepth; /* Lowest DEPTH value seen. */
1434 int start_quoted = 0; /* Nonzero means starting after a char quote */
1435 Lisp_Object tem;
1436
1437 immediate_quit = 1;
1438 QUIT;
1439
1440 if (NILP (oldstate))
1441 {
1442 depth = 0;
1443 state.instring = -1;
1444 state.incomment = 0;
1445 state.comstyle = 0; /* comment style a by default */
1446 }
1447 else
1448 {
1449 tem = Fcar (oldstate);
1450 if (!NILP (tem))
1451 depth = XINT (tem);
1452 else
1453 depth = 0;
1454
1455 oldstate = Fcdr (oldstate);
1456 oldstate = Fcdr (oldstate);
1457 oldstate = Fcdr (oldstate);
1458 tem = Fcar (oldstate);
1459 state.instring = !NILP (tem) ? XINT (tem) : -1;
1460
1461 oldstate = Fcdr (oldstate);
1462 tem = Fcar (oldstate);
1463 state.incomment = !NILP (tem);
1464
1465 oldstate = Fcdr (oldstate);
1466 tem = Fcar (oldstate);
1467 start_quoted = !NILP (tem);
1468
1469 /* if the eight element of the list is nil, we are in comment
1470 style a. if it is non-nil, we are in comment style b */
1471 oldstate = Fcdr (oldstate);
1472 oldstate = Fcdr (oldstate);
1473 tem = Fcar (oldstate);
1474 state.comstyle = !NILP (tem);
1475 }
1476 state.quoted = 0;
1477 mindepth = depth;
1478
1479 curlevel->prev = -1;
1480 curlevel->last = -1;
1481
1482 /* Enter the loop at a place appropriate for initial state. */
1483
1484 if (state.incomment) goto startincomment;
1485 if (state.instring >= 0)
1486 {
1487 if (start_quoted) goto startquotedinstring;
1488 goto startinstring;
1489 }
1490 if (start_quoted) goto startquoted;
1491
1492 while (from < end)
1493 {
1494 code = SYNTAX (FETCH_CHAR (from));
1495 from++;
1496 if (code == Scomment)
1497 state.comstart = from-1;
1498
1499 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1500 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
1501 {
1502 /* Record the comment style we have entered so that only
1503 the comment-end sequence of the same style actually
1504 terminates the comment section. */
1505 code = Scomment;
1506 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1507 state.comstart = from-1;
1508 from++;
1509 }
1510
1511 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
1512 continue;
1513 switch (SWITCH_ENUM_CAST (code))
1514 {
1515 case Sescape:
1516 case Scharquote:
1517 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1518 curlevel->last = from - 1;
1519 startquoted:
1520 if (from == end) goto endquoted;
1521 from++;
1522 goto symstarted;
1523 /* treat following character as a word constituent */
1524 case Sword:
1525 case Ssymbol:
1526 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1527 curlevel->last = from - 1;
1528 symstarted:
1529 while (from < end)
1530 {
1531 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1532 {
1533 case Scharquote:
1534 case Sescape:
1535 from++;
1536 if (from == end) goto endquoted;
1537 break;
1538 case Sword:
1539 case Ssymbol:
1540 case Squote:
1541 break;
1542 default:
1543 goto symdone;
1544 }
1545 from++;
1546 }
1547 symdone:
1548 curlevel->prev = curlevel->last;
1549 break;
1550
1551 startincomment:
1552 if (commentstop)
1553 goto done;
1554 if (from != BEGV)
1555 {
1556 /* Enter the loop in the middle so that we find
1557 a 2-char comment ender if we start in the middle of it. */
1558 prev = FETCH_CHAR (from - 1);
1559 goto startincomment_1;
1560 }
1561 /* At beginning of buffer, enter the loop the ordinary way. */
1562
1563 case Scomment:
1564 state.incomment = 1;
1565 if (commentstop)
1566 goto done;
1567 while (1)
1568 {
1569 if (from == end) goto done;
1570 prev = FETCH_CHAR (from);
1571 if (SYNTAX (prev) == Sendcomment
1572 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1573 /* Only terminate the comment section if the endcomment
1574 of the same style as the start sequence has been
1575 encountered. */
1576 break;
1577 from++;
1578 startincomment_1:
1579 if (from < end && SYNTAX_COMEND_FIRST (prev)
1580 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1581 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1582 /* Only terminate the comment section if the end-comment
1583 sequence of the same style as the start sequence has
1584 been encountered. */
1585 { from++; break; }
1586 }
1587 state.incomment = 0;
1588 state.comstyle = 0; /* reset the comment style */
1589 break;
1590
1591 case Sopen:
1592 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1593 depth++;
1594 /* curlevel++->last ran into compiler bug on Apollo */
1595 curlevel->last = from - 1;
1596 if (++curlevel == endlevel)
1597 error ("Nesting too deep for parser");
1598 curlevel->prev = -1;
1599 curlevel->last = -1;
1600 if (targetdepth == depth) goto done;
1601 break;
1602
1603 case Sclose:
1604 depth--;
1605 if (depth < mindepth)
1606 mindepth = depth;
1607 if (curlevel != levelstart)
1608 curlevel--;
1609 curlevel->prev = curlevel->last;
1610 if (targetdepth == depth) goto done;
1611 break;
1612
1613 case Sstring:
1614 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1615 curlevel->last = from - 1;
1616 state.instring = FETCH_CHAR (from - 1);
1617 startinstring:
1618 while (1)
1619 {
1620 if (from >= end) goto done;
1621 if (FETCH_CHAR (from) == state.instring) break;
1622 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1623 {
1624 case Scharquote:
1625 case Sescape:
1626 from++;
1627 startquotedinstring:
1628 if (from >= end) goto endquoted;
1629 }
1630 from++;
1631 }
1632 state.instring = -1;
1633 curlevel->prev = curlevel->last;
1634 from++;
1635 break;
1636
1637 case Smath:
1638 break;
1639 }
1640 }
1641 goto done;
1642
1643 stop: /* Here if stopping before start of sexp. */
1644 from--; /* We have just fetched the char that starts it; */
1645 goto done; /* but return the position before it. */
1646
1647 endquoted:
1648 state.quoted = 1;
1649 done:
1650 state.depth = depth;
1651 state.mindepth = mindepth;
1652 state.thislevelstart = curlevel->prev;
1653 state.prevlevelstart
1654 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1655 state.location = from;
1656 immediate_quit = 0;
1657
1658 *stateptr = state;
1659 }
1660
1661 /* This comment supplies the doc string for parse-partial-sexp,
1662 for make-docfile to see. We cannot put this in the real DEFUN
1663 due to limits in the Unix cpp.
1664
1665 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
1666 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1667 Parsing stops at TO or when certain criteria are met;\n\
1668 point is set to where parsing stops.\n\
1669 If fifth arg STATE is omitted or nil,\n\
1670 parsing assumes that FROM is the beginning of a function.\n\
1671 Value is a list of eight elements describing final state of parsing:\n\
1672 0. depth in parens.\n\
1673 1. character address of start of innermost containing list; nil if none.\n\
1674 2. character address of start of last complete sexp terminated.\n\
1675 3. non-nil if inside a string.\n\
1676 (it is the character that will terminate the string.)\n\
1677 4. t if inside a comment.\n\
1678 5. t if following a quote character.\n\
1679 6. the minimum paren-depth encountered during this scan.\n\
1680 7. t if in a comment of style `b'.\n\
1681 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1682 in parentheses becomes equal to TARGETDEPTH.\n\
1683 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1684 any character that starts a sexp.\n\
1685 Fifth arg STATE is an eight-list like what this function returns.\n\
1686 It is used to initialize the state of the parse. Its second and third
1687 elements are ignored.
1688 Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1689 (from, to, targetdepth, stopbefore, state, commentstop)
1690 */
1691
1692 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
1693 0 /* See immediately above */)
1694 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1695 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
1696 {
1697 struct lisp_parse_state state;
1698 int target;
1699
1700 if (!NILP (targetdepth))
1701 {
1702 CHECK_NUMBER (targetdepth, 3);
1703 target = XINT (targetdepth);
1704 }
1705 else
1706 target = -100000; /* We won't reach this depth */
1707
1708 validate_region (&from, &to);
1709 scan_sexps_forward (&state, XINT (from), XINT (to),
1710 target, !NILP (stopbefore), oldstate,
1711 !NILP (commentstop));
1712
1713 SET_PT (state.location);
1714
1715 return Fcons (make_number (state.depth),
1716 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1717 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1718 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1719 Fcons (state.incomment ? Qt : Qnil,
1720 Fcons (state.quoted ? Qt : Qnil,
1721 Fcons (make_number (state.mindepth),
1722 Fcons (state.comstyle ? Qt : Qnil,
1723 Qnil))))))));
1724 }
1725 \f
1726 init_syntax_once ()
1727 {
1728 register int i;
1729 Lisp_Object temp;
1730
1731 /* This has to be done here, before we call Fmake_char_table. */
1732 Qsyntax_table = intern ("syntax-table");
1733 staticpro (&Qsyntax_table);
1734
1735 /* Intern this now in case it isn't already done.
1736 Setting this variable twice is harmless.
1737 But don't staticpro it here--that is done in alloc.c. */
1738 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1739
1740 /* Now we are ready to set up this property, so we can
1741 create syntax tables. */
1742 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
1743
1744 temp = Fcons (make_number ((int) Swhitespace), Qnil);
1745
1746 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
1747
1748 temp = Fcons (make_number ((int) Sword), Qnil);
1749 for (i = 'a'; i <= 'z'; i++)
1750 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1751 for (i = 'A'; i <= 'Z'; i++)
1752 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1753 for (i = '0'; i <= '9'; i++)
1754 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1755
1756 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
1757 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
1758
1759 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
1760 Fcons (make_number (Sopen), make_number (')')));
1761 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
1762 Fcons (make_number (Sclose), make_number ('(')));
1763 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
1764 Fcons (make_number (Sopen), make_number (']')));
1765 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
1766 Fcons (make_number (Sclose), make_number ('[')));
1767 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
1768 Fcons (make_number (Sopen), make_number ('}')));
1769 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
1770 Fcons (make_number (Sclose), make_number ('{')));
1771 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
1772 Fcons (make_number ((int) Sstring), Qnil));
1773 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
1774 Fcons (make_number ((int) Sescape), Qnil));
1775
1776 temp = Fcons (make_number ((int) Ssymbol), Qnil);
1777 for (i = 0; i < 10; i++)
1778 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, "_-+*/&|<>="[i], temp);
1779
1780 temp = Fcons (make_number ((int) Spunct), Qnil);
1781 for (i = 0; i < 12; i++)
1782 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ".,;:?!#@~^'`"[i], temp);
1783 }
1784
1785 syms_of_syntax ()
1786 {
1787 Qsyntax_table_p = intern ("syntax-table-p");
1788 staticpro (&Qsyntax_table_p);
1789
1790 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1791 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1792
1793 words_include_escapes = 0;
1794 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1795 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1796
1797 defsubr (&Ssyntax_table_p);
1798 defsubr (&Ssyntax_table);
1799 defsubr (&Sstandard_syntax_table);
1800 defsubr (&Scopy_syntax_table);
1801 defsubr (&Sset_syntax_table);
1802 defsubr (&Schar_syntax);
1803 defsubr (&Smatching_paren);
1804 defsubr (&Smodify_syntax_entry);
1805 defsubr (&Sdescribe_syntax);
1806
1807 defsubr (&Sforward_word);
1808
1809 defsubr (&Sforward_comment);
1810 defsubr (&Sscan_lists);
1811 defsubr (&Sscan_sexps);
1812 defsubr (&Sbackward_prefix_chars);
1813 defsubr (&Sparse_partial_sexp);
1814 }