Initial revision
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 1, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 #include "config.h"
22 #include <ctype.h>
23 #include "lisp.h"
24 #include "commands.h"
25 #include "buffer.h"
26 #include "syntax.h"
27
28 Lisp_Object Qsyntax_table_p;
29
30 int words_include_escapes;
31
32 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
33 "Return t if ARG is a syntax table.\n\
34 Any vector of 256 elements will do.")
35 (obj)
36 Lisp_Object obj;
37 {
38 if (XTYPE (obj) == Lisp_Vector && XVECTOR (obj)->size == 0400)
39 return Qt;
40 return Qnil;
41 }
42
43 Lisp_Object
44 check_syntax_table (obj)
45 Lisp_Object obj;
46 {
47 register Lisp_Object tem;
48 while (tem = Fsyntax_table_p (obj),
49 NULL (tem))
50 obj = wrong_type_argument (Qsyntax_table_p, obj, 0);
51 return obj;
52 }
53
54
55 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
56 "Return the current syntax table.\n\
57 This is the one specified by the current buffer.")
58 ()
59 {
60 return current_buffer->syntax_table;
61 }
62
63 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
64 Sstandard_syntax_table, 0, 0, 0,
65 "Return the standard syntax table.\n\
66 This is the one used for new buffers.")
67 ()
68 {
69 return Vstandard_syntax_table;
70 }
71
72 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
73 "Construct a new syntax table and return it.\n\
74 It is a copy of the TABLE, which defaults to the standard syntax table.")
75 (table)
76 Lisp_Object table;
77 {
78 Lisp_Object size, val;
79 XFASTINT (size) = 0400;
80 XFASTINT (val) = 0;
81 val = Fmake_vector (size, val);
82 if (!NULL (table))
83 table = check_syntax_table (table);
84 else if (NULL (Vstandard_syntax_table))
85 /* Can only be null during initialization */
86 return val;
87 else table = Vstandard_syntax_table;
88
89 bcopy (XVECTOR (table)->contents,
90 XVECTOR (val)->contents, 0400 * sizeof (Lisp_Object));
91 return val;
92 }
93
94 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
95 "Select a new syntax table for the current buffer.\n\
96 One argument, a syntax table.")
97 (table)
98 Lisp_Object table;
99 {
100 table = check_syntax_table (table);
101 current_buffer->syntax_table = table;
102 /* Indicate that this buffer now has a specified syntax table. */
103 current_buffer->local_var_flags |= buffer_local_flags.syntax_table;
104 return table;
105 }
106 \f
107 /* Convert a letter which signifies a syntax code
108 into the code it signifies.
109 This is used by modify-syntax-entry, and other things. */
110
111 unsigned char syntax_spec_code[0400] =
112 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
113 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
114 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
115 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
116 (char) Swhitespace, 0377, (char) Sstring, 0377,
117 (char) Smath, 0377, 0377, (char) Squote,
118 (char) Sopen, (char) Sclose, 0377, 0377,
119 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
120 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
121 0377, 0377, 0377, 0377,
122 (char) Scomment, 0377, (char) Sendcomment, 0377,
123 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A, ... */
124 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
125 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
126 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
127 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
128 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
129 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
130 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
131 };
132
133 /* Indexed by syntax code, give the letter that describes it. */
134
135 char syntax_code_spec[13] =
136 {
137 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>'
138 };
139 \f
140 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
141 "Return the syntax code of CHAR, described by a character.\n\
142 For example, if CHAR is a word constituent, the character `?w' is returned.\n\
143 The characters that correspond to various syntax codes\n\
144 are listed in the documentation of `modify-syntax-entry'.")
145 (ch)
146 Lisp_Object ch;
147 {
148 CHECK_NUMBER (ch, 0);
149 return make_number (syntax_code_spec[(int) SYNTAX (0xFF & XINT (ch))]);
150 }
151
152 /* This comment supplies the doc string for modify-syntax-entry,
153 for make-docfile to see. We cannot put this in the real DEFUN
154 due to limits in the Unix cpp.
155
156 DEFUN ("modify-syntax-entry", foo, bar, 0, 0, 0,
157 "Set syntax for character CHAR according to string S.\n\
158 The syntax is changed only for table TABLE, which defaults to\n\
159 the current buffer's syntax table.\n\
160 The first character of S should be one of the following:\n\
161 Space whitespace syntax. w word constituent.\n\
162 _ symbol constituent. . punctuation.\n\
163 ( open-parenthesis. ) close-parenthesis.\n\
164 \" string quote. \\ character-quote.\n\
165 $ paired delimiter. ' expression quote or prefix operator.\n\
166 < comment starter. > comment ender.\n\
167 Only single-character comment start and end sequences are represented thus.\n\
168 Two-character sequences are represented as described below.\n\
169 The second character of S is the matching parenthesis,\n\
170 used only if the first character is `(' or `)'.\n\
171 Any additional characters are flags.\n\
172 Defined flags are the characters 1, 2, 3, 4, and p.\n\
173 1 means C is the start of a two-char comment start sequence.\n\
174 2 means C is the second character of such a sequence.\n\
175 3 means C is the start of a two-char comment end sequence.\n\
176 4 means C is the second character of such a sequence.\n\
177 p means C is a prefix character for `backward-prefix-chars';
178 such characters are treated as whitespace when they occur
179 between expressions.")
180
181 */
182
183 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
184 /* I really don't know why this is interactive
185 help-form should at least be made useful whilst reading the second arg
186 */
187 "cSet syntax for character: \nsSet syntax for %s to: ",
188 0 /* See immediately above */)
189 (c, newentry, syntax_table)
190 Lisp_Object c, newentry, syntax_table;
191 {
192 register unsigned char *p, match;
193 register enum syntaxcode code;
194 Lisp_Object val;
195
196 CHECK_NUMBER (c, 0);
197 CHECK_STRING (newentry, 1);
198 if (NULL (syntax_table))
199 syntax_table = current_buffer->syntax_table;
200 else
201 syntax_table = check_syntax_table (syntax_table);
202
203 p = XSTRING (newentry)->data;
204 code = (enum syntaxcode) syntax_spec_code[*p++];
205 if (((int) code & 0377) == 0377)
206 error ("invalid syntax description letter: %c", c);
207
208 match = *p;
209 if (match) p++;
210 if (match == ' ') match = 0;
211
212 XFASTINT (val) = (match << 8) + (int) code;
213 while (*p)
214 switch (*p++)
215 {
216 case '1':
217 XFASTINT (val) |= 1 << 16;
218 break;
219
220 case '2':
221 XFASTINT (val) |= 1 << 17;
222 break;
223
224 case '3':
225 XFASTINT (val) |= 1 << 18;
226 break;
227
228 case '4':
229 XFASTINT (val) |= 1 << 19;
230 break;
231
232 case 'p':
233 XFASTINT (val) |= 1 << 20;
234 break;
235 }
236
237 XVECTOR (syntax_table)->contents[0xFF & XINT (c)] = val;
238
239 return Qnil;
240 }
241 \f
242 /* Dump syntax table to buffer in human-readable format */
243
244 describe_syntax (value)
245 Lisp_Object value;
246 {
247 register enum syntaxcode code;
248 char desc, match, start1, start2, end1, end2, prefix;
249 char str[2];
250
251 Findent_to (make_number (16), make_number (1));
252
253 if (XTYPE (value) != Lisp_Int)
254 {
255 insert_string ("invalid");
256 return;
257 }
258
259 code = (enum syntaxcode) (XINT (value) & 0377);
260 match = (XINT (value) >> 8) & 0377;
261 start1 = (XINT (value) >> 16) & 1;
262 start2 = (XINT (value) >> 17) & 1;
263 end1 = (XINT (value) >> 18) & 1;
264 end2 = (XINT (value) >> 19) & 1;
265 prefix = (XINT (value) >> 20) & 1;
266
267 if ((int) code < 0 || (int) code >= (int) Smax)
268 {
269 insert_string ("invalid");
270 return;
271 }
272 desc = syntax_code_spec[(int) code];
273
274 str[0] = desc, str[1] = 0;
275 insert (str, 1);
276
277 str[0] = match ? match : ' ';
278 insert (str, 1);
279
280
281 if (start1)
282 insert ("1", 1);
283 if (start2)
284 insert ("2", 1);
285
286 if (end1)
287 insert ("3", 1);
288 if (end2)
289 insert ("4", 1);
290
291 if (prefix)
292 insert ("p", 1);
293
294 insert_string ("\twhich means: ");
295
296 #ifdef SWITCH_ENUM_BUG
297 switch ((int) code)
298 #else
299 switch (code)
300 #endif
301 {
302 case Swhitespace:
303 insert_string ("whitespace"); break;
304 case Spunct:
305 insert_string ("punctuation"); break;
306 case Sword:
307 insert_string ("word"); break;
308 case Ssymbol:
309 insert_string ("symbol"); break;
310 case Sopen:
311 insert_string ("open"); break;
312 case Sclose:
313 insert_string ("close"); break;
314 case Squote:
315 insert_string ("quote"); break;
316 case Sstring:
317 insert_string ("string"); break;
318 case Smath:
319 insert_string ("math"); break;
320 case Sescape:
321 insert_string ("escape"); break;
322 case Scharquote:
323 insert_string ("charquote"); break;
324 case Scomment:
325 insert_string ("comment"); break;
326 case Sendcomment:
327 insert_string ("endcomment"); break;
328 default:
329 insert_string ("invalid");
330 return;
331 }
332
333 if (match)
334 {
335 insert_string (", matches ");
336
337 str[0] = match, str[1] = 0;
338 insert (str, 1);
339 }
340
341 if (start1)
342 insert_string (",\n\t is the first character of a comment-start sequence");
343 if (start2)
344 insert_string (",\n\t is the second character of a comment-start sequence");
345
346 if (end1)
347 insert_string (",\n\t is the first character of a comment-end sequence");
348 if (end2)
349 insert_string (",\n\t is the second character of a comment-end sequence");
350 if (prefix)
351 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
352
353 insert_string ("\n");
354 }
355
356 Lisp_Object
357 describe_syntax_1 (vector)
358 Lisp_Object vector;
359 {
360 struct buffer *old = current_buffer;
361 set_buffer_internal (XBUFFER (Vstandard_output));
362 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
363 set_buffer_internal (old);
364 return Qnil;
365 }
366
367 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
368 "Describe the syntax specifications in the syntax table.\n\
369 The descriptions are inserted in a buffer, which is then displayed.")
370 ()
371 {
372 internal_with_output_to_temp_buffer
373 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
374
375 return Qnil;
376 }
377 \f
378 /* Return the position across COUNT words from FROM.
379 If that many words cannot be found before the end of the buffer, return 0.
380 COUNT negative means scan backward and stop at word beginning. */
381
382 scan_words (from, count)
383 register int from, count;
384 {
385 register int beg = BEGV;
386 register int end = ZV;
387 register int code;
388
389 immediate_quit = 1;
390 QUIT;
391
392 while (count > 0)
393 {
394 while (1)
395 {
396 if (from == end)
397 {
398 immediate_quit = 0;
399 return 0;
400 }
401 code = SYNTAX (FETCH_CHAR (from));
402 if (words_include_escapes
403 && (code == Sescape || code == Scharquote))
404 break;
405 if (code == Sword)
406 break;
407 from++;
408 }
409 while (1)
410 {
411 if (from == end) break;
412 code = SYNTAX (FETCH_CHAR (from));
413 if (!(words_include_escapes
414 && (code == Sescape || code == Scharquote)))
415 if (code != Sword)
416 break;
417 from++;
418 }
419 count--;
420 }
421 while (count < 0)
422 {
423 while (1)
424 {
425 if (from == beg)
426 {
427 immediate_quit = 0;
428 return 0;
429 }
430 code = SYNTAX (FETCH_CHAR (from - 1));
431 if (words_include_escapes
432 && (code == Sescape || code == Scharquote))
433 break;
434 if (code == Sword)
435 break;
436 from--;
437 }
438 while (1)
439 {
440 if (from == beg) break;
441 code = SYNTAX (FETCH_CHAR (from - 1));
442 if (!(words_include_escapes
443 && (code == Sescape || code == Scharquote)))
444 if (code != Sword)
445 break;
446 from--;
447 }
448 count++;
449 }
450
451 immediate_quit = 0;
452
453 return from;
454 }
455
456 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
457 "Move point forward ARG words (backward if ARG is negative).\n\
458 Normally returns t.\n\
459 If an edge of the buffer is reached, point is left there\n\
460 and nil is returned.")
461 (count)
462 Lisp_Object count;
463 {
464 int val;
465 CHECK_NUMBER (count, 0);
466
467 if (!(val = scan_words (point, XINT (count))))
468 {
469 SET_PT (XINT (count) > 0 ? ZV : BEGV);
470 return Qnil;
471 }
472 SET_PT (val);
473 return Qt;
474 }
475 \f
476 int parse_sexp_ignore_comments;
477
478 Lisp_Object
479 scan_lists (from, count, depth, sexpflag)
480 register int from;
481 int count, depth, sexpflag;
482 {
483 Lisp_Object val;
484 register int stop;
485 register int c;
486 char stringterm;
487 int quoted;
488 int mathexit = 0;
489 register enum syntaxcode code;
490 int min_depth = depth; /* Err out if depth gets less than this. */
491
492 if (depth > 0) min_depth = 0;
493
494 immediate_quit = 1;
495 QUIT;
496
497 while (count > 0)
498 {
499 stop = ZV;
500 while (from < stop)
501 {
502 c = FETCH_CHAR (from);
503 code = SYNTAX(c);
504 from++;
505 if (from < stop && SYNTAX_COMSTART_FIRST (c)
506 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
507 && parse_sexp_ignore_comments)
508 code = Scomment, from++;
509 if (SYNTAX_PREFIX (c))
510 continue;
511
512 #ifdef SWITCH_ENUM_BUG
513 switch ((int) code)
514 #else
515 switch (code)
516 #endif
517 {
518 case Sescape:
519 case Scharquote:
520 if (from == stop) goto lose;
521 from++;
522 /* treat following character as a word constituent */
523 case Sword:
524 case Ssymbol:
525 if (depth || !sexpflag) break;
526 /* This word counts as a sexp; return at end of it. */
527 while (from < stop)
528 {
529 #ifdef SWITCH_ENUM_BUG
530 switch ((int) SYNTAX(FETCH_CHAR (from)))
531 #else
532 switch (SYNTAX(FETCH_CHAR (from)))
533 #endif
534 {
535 case Scharquote:
536 case Sescape:
537 from++;
538 if (from == stop) goto lose;
539 break;
540 case Sword:
541 case Ssymbol:
542 case Squote:
543 break;
544 default:
545 goto done;
546 }
547 from++;
548 }
549 goto done;
550
551 case Scomment:
552 if (!parse_sexp_ignore_comments) break;
553 while (1)
554 {
555 if (from == stop) goto done;
556 if (SYNTAX (c = FETCH_CHAR (from)) == Sendcomment)
557 break;
558 from++;
559 if (from < stop && SYNTAX_COMEND_FIRST (c)
560 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from)))
561 { from++; break; }
562 }
563 break;
564
565 case Smath:
566 if (!sexpflag)
567 break;
568 if (from != stop && c == FETCH_CHAR (from))
569 from++;
570 if (mathexit)
571 {
572 mathexit = 0;
573 goto close1;
574 }
575 mathexit = 1;
576
577 case Sopen:
578 if (!++depth) goto done;
579 break;
580
581 case Sclose:
582 close1:
583 if (!--depth) goto done;
584 if (depth < min_depth)
585 error ("Containing expression ends prematurely");
586 break;
587
588 case Sstring:
589 stringterm = FETCH_CHAR (from - 1);
590 while (1)
591 {
592 if (from >= stop) goto lose;
593 if (FETCH_CHAR (from) == stringterm) break;
594 #ifdef SWITCH_ENUM_BUG
595 switch ((int) SYNTAX(FETCH_CHAR (from)))
596 #else
597 switch (SYNTAX(FETCH_CHAR (from)))
598 #endif
599 {
600 case Scharquote:
601 case Sescape:
602 from++;
603 }
604 from++;
605 }
606 from++;
607 if (!depth && sexpflag) goto done;
608 break;
609 }
610 }
611
612 /* Reached end of buffer. Error if within object, return nil if between */
613 if (depth) goto lose;
614
615 immediate_quit = 0;
616 return Qnil;
617
618 /* End of object reached */
619 done:
620 count--;
621 }
622
623 while (count < 0)
624 {
625 stop = BEGV;
626 while (from > stop)
627 {
628 from--;
629 if (quoted = char_quoted (from))
630 from--;
631 c = FETCH_CHAR (from);
632 code = SYNTAX (c);
633 if (from > stop && SYNTAX_COMEND_SECOND (c)
634 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
635 && !char_quoted (from - 1)
636 && parse_sexp_ignore_comments)
637 code = Sendcomment, from--;
638 if (SYNTAX_PREFIX (c))
639 continue;
640
641 #ifdef SWITCH_ENUM_BUG
642 switch ((int) (quoted ? Sword : code))
643 #else
644 switch (quoted ? Sword : code)
645 #endif
646 {
647 case Sword:
648 case Ssymbol:
649 if (depth || !sexpflag) break;
650 /* This word counts as a sexp; count object finished after passing it. */
651 while (from > stop)
652 {
653 quoted = char_quoted (from - 1);
654 if (quoted)
655 from--;
656 if (! (quoted || SYNTAX(FETCH_CHAR (from - 1)) == Sword
657 || SYNTAX(FETCH_CHAR (from - 1)) == Ssymbol
658 || SYNTAX(FETCH_CHAR (from - 1)) == Squote))
659 goto done2;
660 from--;
661 }
662 goto done2;
663
664 case Smath:
665 if (!sexpflag)
666 break;
667 if (from != stop && c == FETCH_CHAR (from - 1))
668 from--;
669 if (mathexit)
670 {
671 mathexit = 0;
672 goto open2;
673 }
674 mathexit = 1;
675
676 case Sclose:
677 if (!++depth) goto done2;
678 break;
679
680 case Sopen:
681 open2:
682 if (!--depth) goto done2;
683 if (depth < min_depth)
684 error ("Containing expression ends prematurely");
685 break;
686
687 case Sendcomment:
688 if (!parse_sexp_ignore_comments)
689 break;
690 /* Look back, counting the parity of string-quotes,
691 and recording the comment-starters seen.
692 When we reach a safe place, assume that's not in a string;
693 then step the main scan to the earliest comment-starter seen
694 an even number of string quotes away from the safe place.
695
696 OFROM[I] is position of the earliest comment-starter seen
697 which is I+2X quotes from the comment-end.
698 PARITY is current parity of quotes from the comment end. */
699 {
700 int ofrom[2];
701 int parity = 0;
702
703 ofrom[0] = ofrom[1] = from;
704
705 /* At beginning of range to scan, we're outside of strings;
706 that determines quote parity to the comment-end. */
707 while (from != stop)
708 {
709 /* Move back and examine a character. */
710 from--;
711
712 c = FETCH_CHAR (from);
713 code = SYNTAX (c);
714
715 /* If this char is the second of a 2-char comment sequence,
716 back up and give the pair the appropriate syntax. */
717 if (from > stop && SYNTAX_COMEND_SECOND (c)
718 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
719 code = Sendcomment, from--;
720 else if (from > stop && SYNTAX_COMSTART_SECOND (c)
721 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1)))
722 code = Scomment, from--;
723
724 /* Ignore escaped characters. */
725 if (char_quoted (from))
726 continue;
727
728 /* Track parity of quotes between here and comment-end. */
729 if (code == Sstring)
730 parity ^= 1;
731
732 /* Record comment-starters according to that
733 quote-parity to the comment-end. */
734 if (code == Scomment)
735 ofrom[parity] = from;
736
737 /* If we come to another comment-end,
738 assume it's not inside a string.
739 That determines the quote parity to the comment-end. */
740 if (code == Sendcomment)
741 break;
742 }
743 from = ofrom[parity];
744 }
745 break;
746
747 case Sstring:
748 stringterm = FETCH_CHAR (from);
749 while (1)
750 {
751 if (from == stop) goto lose;
752 if (!char_quoted (from - 1)
753 && stringterm == FETCH_CHAR (from - 1))
754 break;
755 from--;
756 }
757 from--;
758 if (!depth && sexpflag) goto done2;
759 break;
760 }
761 }
762
763 /* Reached start of buffer. Error if within object, return nil if between */
764 if (depth) goto lose;
765
766 immediate_quit = 0;
767 return Qnil;
768
769 done2:
770 count++;
771 }
772
773
774 immediate_quit = 0;
775 XFASTINT (val) = from;
776 return val;
777
778 lose:
779 error ("Unbalanced parentheses");
780 /* NOTREACHED */
781 }
782
783 char_quoted (pos)
784 register int pos;
785 {
786 register enum syntaxcode code;
787 register int beg = BEGV;
788 register int quoted = 0;
789
790 while (pos > beg
791 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
792 || code == Sescape))
793 pos--, quoted = !quoted;
794 return quoted;
795 }
796
797 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
798 "Scan from character number FROM by COUNT lists.\n\
799 Returns the character number of the position thus found.\n\
800 \n\
801 If DEPTH is nonzero, paren depth begins counting from that value,\n\
802 only places where the depth in parentheses becomes zero\n\
803 are candidates for stopping; COUNT such places are counted.\n\
804 Thus, a positive value for DEPTH means go out levels.\n\
805 \n\
806 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
807 \n\
808 If the beginning or end of (the accessible part of) the buffer is reached\n\
809 and the depth is wrong, an error is signaled.\n\
810 If the depth is right but the count is not used up, nil is returned.")
811 (from, count, depth)
812 Lisp_Object from, count, depth;
813 {
814 CHECK_NUMBER (from, 0);
815 CHECK_NUMBER (count, 1);
816 CHECK_NUMBER (depth, 2);
817
818 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
819 }
820
821 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
822 "Scan from character number FROM by COUNT balanced expressions.\n\
823 If COUNT is negative, scan backwards.\n\
824 Returns the character number of the position thus found.\n\
825 \n\
826 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
827 \n\
828 If the beginning or end of (the accessible part of) the buffer is reached\n\
829 in the middle of a parenthetical grouping, an error is signaled.\n\
830 If the beginning or end is reached between groupings\n\
831 but before count is used up, nil is returned.")
832 (from, count)
833 Lisp_Object from, count;
834 {
835 CHECK_NUMBER (from, 0);
836 CHECK_NUMBER (count, 1);
837
838 return scan_lists (XINT (from), XINT (count), 0, 1);
839 }
840
841 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
842 0, 0, 0,
843 "Move point backward over any number of chars with prefix syntax.\n\
844 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
845 ()
846 {
847 int beg = BEGV;
848 int pos = point;
849
850 while (pos > beg && !char_quoted (pos - 1)
851 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
852 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
853 pos--;
854
855 SET_PT (pos);
856
857 return Qnil;
858 }
859 \f
860 struct lisp_parse_state
861 {
862 int depth; /* Depth at end of parsing */
863 int instring; /* -1 if not within string, else desired terminator. */
864 int incomment; /* Nonzero if within a comment at end of parsing */
865 int quoted; /* Nonzero if just after an escape char at end of parsing */
866 int thislevelstart; /* Char number of most recent start-of-expression at current level */
867 int prevlevelstart; /* Char number of start of containing expression */
868 int location; /* Char number at which parsing stopped. */
869 int mindepth; /* Minimum depth seen while scanning. */
870 };
871
872 /* Parse forward from FROM to END,
873 assuming that FROM is the start of a function,
874 and return a description of the state of the parse at END. */
875
876 struct lisp_parse_state val_scan_sexps_forward;
877
878 struct lisp_parse_state *
879 scan_sexps_forward (from, end, targetdepth, stopbefore, oldstate)
880 register int from;
881 int end, targetdepth, stopbefore;
882 Lisp_Object oldstate;
883 {
884 struct lisp_parse_state state;
885
886 register enum syntaxcode code;
887 struct level { int last, prev; };
888 struct level levelstart[100];
889 register struct level *curlevel = levelstart;
890 struct level *endlevel = levelstart + 100;
891 char prev;
892 register int depth; /* Paren depth of current scanning location.
893 level - levelstart equals this except
894 when the depth becomes negative. */
895 int mindepth; /* Lowest DEPTH value seen. */
896 int start_quoted = 0; /* Nonzero means starting after a char quote */
897 Lisp_Object tem;
898
899 immediate_quit = 1;
900 QUIT;
901
902 if (NULL (oldstate))
903 {
904 depth = 0;
905 state.instring = -1;
906 state.incomment = 0;
907 }
908 else
909 {
910 tem = Fcar (oldstate);
911 if (!NULL (tem))
912 depth = XINT (tem);
913 else
914 depth = 0;
915
916 oldstate = Fcdr (oldstate);
917 oldstate = Fcdr (oldstate);
918 oldstate = Fcdr (oldstate);
919 tem = Fcar (oldstate);
920 state.instring = !NULL (tem) ? XINT (tem) : -1;
921
922 oldstate = Fcdr (oldstate);
923 tem = Fcar (oldstate);
924 state.incomment = !NULL (tem);
925
926 oldstate = Fcdr (oldstate);
927 tem = Fcar (oldstate);
928 start_quoted = !NULL (tem);
929 }
930 state.quoted = 0;
931 mindepth = depth;
932
933 curlevel->prev = -1;
934 curlevel->last = -1;
935
936 /* Enter the loop at a place appropriate for initial state. */
937
938 if (state.incomment) goto startincomment;
939 if (state.instring >= 0)
940 {
941 if (start_quoted) goto startquotedinstring;
942 goto startinstring;
943 }
944 if (start_quoted) goto startquoted;
945
946 while (from < end)
947 {
948 code = SYNTAX(FETCH_CHAR (from));
949 from++;
950 if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
951 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
952 code = Scomment, from++;
953 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
954 continue;
955 #ifdef SWITCH_ENUM_BUG
956 switch ((int) code)
957 #else
958 switch (code)
959 #endif
960 {
961 case Sescape:
962 case Scharquote:
963 if (stopbefore) goto stop; /* this arg means stop at sexp start */
964 curlevel->last = from - 1;
965 startquoted:
966 if (from == end) goto endquoted;
967 from++;
968 goto symstarted;
969 /* treat following character as a word constituent */
970 case Sword:
971 case Ssymbol:
972 if (stopbefore) goto stop; /* this arg means stop at sexp start */
973 curlevel->last = from - 1;
974 symstarted:
975 while (from < end)
976 {
977 #ifdef SWITCH_ENUM_BUG
978 switch ((int) SYNTAX(FETCH_CHAR (from)))
979 #else
980 switch (SYNTAX(FETCH_CHAR (from)))
981 #endif
982 {
983 case Scharquote:
984 case Sescape:
985 from++;
986 if (from == end) goto endquoted;
987 break;
988 case Sword:
989 case Ssymbol:
990 case Squote:
991 break;
992 default:
993 goto symdone;
994 }
995 from++;
996 }
997 symdone:
998 curlevel->prev = curlevel->last;
999 break;
1000
1001 case Scomment:
1002 state.incomment = 1;
1003 startincomment:
1004 while (1)
1005 {
1006 if (from == end) goto done;
1007 if (SYNTAX (prev = FETCH_CHAR (from)) == Sendcomment)
1008 break;
1009 from++;
1010 if (from < end && SYNTAX_COMEND_FIRST (prev)
1011 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from)))
1012 { from++; break; }
1013 }
1014 state.incomment = 0;
1015 break;
1016
1017 case Sopen:
1018 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1019 depth++;
1020 /* curlevel++->last ran into compiler bug on Apollo */
1021 curlevel->last = from - 1;
1022 if (++curlevel == endlevel)
1023 error ("Nesting too deep for parser");
1024 curlevel->prev = -1;
1025 curlevel->last = -1;
1026 if (!--targetdepth) goto done;
1027 break;
1028
1029 case Sclose:
1030 depth--;
1031 if (depth < mindepth)
1032 mindepth = depth;
1033 if (curlevel != levelstart)
1034 curlevel--;
1035 curlevel->prev = curlevel->last;
1036 if (!++targetdepth) goto done;
1037 break;
1038
1039 case Sstring:
1040 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1041 curlevel->last = from - 1;
1042 state.instring = FETCH_CHAR (from - 1);
1043 startinstring:
1044 while (1)
1045 {
1046 if (from >= end) goto done;
1047 if (FETCH_CHAR (from) == state.instring) break;
1048 #ifdef SWITCH_ENUM_BUG
1049 switch ((int) SYNTAX(FETCH_CHAR (from)))
1050 #else
1051 switch (SYNTAX(FETCH_CHAR (from)))
1052 #endif
1053 {
1054 case Scharquote:
1055 case Sescape:
1056 from++;
1057 startquotedinstring:
1058 if (from >= end) goto endquoted;
1059 }
1060 from++;
1061 }
1062 state.instring = -1;
1063 curlevel->prev = curlevel->last;
1064 from++;
1065 break;
1066
1067 case Smath:
1068 break;
1069 }
1070 }
1071 goto done;
1072
1073 stop: /* Here if stopping before start of sexp. */
1074 from--; /* We have just fetched the char that starts it; */
1075 goto done; /* but return the position before it. */
1076
1077 endquoted:
1078 state.quoted = 1;
1079 done:
1080 state.depth = depth;
1081 state.mindepth = mindepth;
1082 state.thislevelstart = curlevel->prev;
1083 state.prevlevelstart
1084 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1085 state.location = from;
1086 immediate_quit = 0;
1087
1088 val_scan_sexps_forward = state;
1089 return &val_scan_sexps_forward;
1090 }
1091
1092 /* This comment supplies the doc string for parse-partial-sexp,
1093 for make-docfile to see. We cannot put this in the real DEFUN
1094 due to limits in the Unix cpp.
1095
1096 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 0, 0, 0,
1097 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1098 Parsing stops at TO or when certain criteria are met;\n\
1099 point is set to where parsing stops.\n\
1100 If fifth arg STATE is omitted or nil,\n\
1101 parsing assumes that FROM is the beginning of a function.\n\
1102 Value is a list of seven elements describing final state of parsing:\n\
1103 1. depth in parens.\n\
1104 2. character address of start of innermost containing list; nil if none.\n\
1105 3. character address of start of last complete sexp terminated.\n\
1106 4. non-nil if inside a string.\n\
1107 (it is the character that will terminate the string.)\n\
1108 5. t if inside a comment.\n\
1109 6. t if following a quote character.\n\
1110 7. the minimum paren-depth encountered during this scan.\n\
1111 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1112 in parentheses becomes equal to TARGETDEPTH.\n\
1113 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1114 any character that starts a sexp.\n\
1115 Fifth arg STATE is a seven-list like what this function returns.\n\
1116 It is used to initialize the state of the parse.")
1117
1118 */
1119
1120 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 5, 0,
1121 0 /* See immediately above */)
1122 (from, to, targetdepth, stopbefore, oldstate)
1123 Lisp_Object from, to, targetdepth, stopbefore, oldstate;
1124 {
1125 struct lisp_parse_state state;
1126 int target;
1127
1128 if (!NULL (targetdepth))
1129 {
1130 CHECK_NUMBER (targetdepth, 3);
1131 target = XINT (targetdepth);
1132 }
1133 else
1134 target = -100000; /* We won't reach this depth */
1135
1136 validate_region (&from, &to);
1137 state = *scan_sexps_forward (XINT (from), XINT (to),
1138 target, !NULL (stopbefore), oldstate);
1139
1140 SET_PT (state.location);
1141
1142 return Fcons (make_number (state.depth),
1143 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1144 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1145 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1146 Fcons (state.incomment ? Qt : Qnil,
1147 Fcons (state.quoted ? Qt : Qnil,
1148 Fcons (make_number (state.mindepth), Qnil)))))));
1149 }
1150 \f
1151 init_syntax_once ()
1152 {
1153 register int i;
1154 register struct Lisp_Vector *v;
1155
1156 /* Set this now, so first buffer creation can refer to it. */
1157 /* Make it nil before calling copy-syntax-table
1158 so that copy-syntax-table will know not to try to copy from garbage */
1159 Vstandard_syntax_table = Qnil;
1160 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1161
1162 v = XVECTOR (Vstandard_syntax_table);
1163
1164 for (i = 'a'; i <= 'z'; i++)
1165 XFASTINT (v->contents[i]) = (int) Sword;
1166 for (i = 'A'; i <= 'Z'; i++)
1167 XFASTINT (v->contents[i]) = (int) Sword;
1168 for (i = '0'; i <= '9'; i++)
1169 XFASTINT (v->contents[i]) = (int) Sword;
1170 XFASTINT (v->contents['$']) = (int) Sword;
1171 XFASTINT (v->contents['%']) = (int) Sword;
1172
1173 XFASTINT (v->contents['(']) = (int) Sopen + (')' << 8);
1174 XFASTINT (v->contents[')']) = (int) Sclose + ('(' << 8);
1175 XFASTINT (v->contents['[']) = (int) Sopen + (']' << 8);
1176 XFASTINT (v->contents[']']) = (int) Sclose + ('[' << 8);
1177 XFASTINT (v->contents['{']) = (int) Sopen + ('}' << 8);
1178 XFASTINT (v->contents['}']) = (int) Sclose + ('{' << 8);
1179 XFASTINT (v->contents['"']) = (int) Sstring;
1180 XFASTINT (v->contents['\\']) = (int) Sescape;
1181
1182 for (i = 0; i < 10; i++)
1183 XFASTINT (v->contents["_-+*/&|<>="[i]]) = (int) Ssymbol;
1184
1185 for (i = 0; i < 12; i++)
1186 XFASTINT (v->contents[".,;:?!#@~^'`"[i]]) = (int) Spunct;
1187 }
1188
1189 syms_of_syntax ()
1190 {
1191 Qsyntax_table_p = intern ("syntax-table-p");
1192 staticpro (&Qsyntax_table_p);
1193
1194 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1195 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1196
1197 words_include_escapes = 0;
1198 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1199 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1200
1201 defsubr (&Ssyntax_table_p);
1202 defsubr (&Ssyntax_table);
1203 defsubr (&Sstandard_syntax_table);
1204 defsubr (&Scopy_syntax_table);
1205 defsubr (&Sset_syntax_table);
1206 defsubr (&Schar_syntax);
1207 defsubr (&Smodify_syntax_entry);
1208 defsubr (&Sdescribe_syntax);
1209
1210 defsubr (&Sforward_word);
1211
1212 defsubr (&Sscan_lists);
1213 defsubr (&Sscan_sexps);
1214 defsubr (&Sbackward_prefix_chars);
1215 defsubr (&Sparse_partial_sexp);
1216 }