(Vafter_change_functions, Vbefore_change_functions): Declared.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
c6c5df7f 2 Copyright (C) 1985, 1987, 1993 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
18160b98 21#include <config.h>
8489eb67
RS
22#include <ctype.h>
23#include "lisp.h"
24#include "commands.h"
25#include "buffer.h"
26#include "syntax.h"
27
28Lisp_Object Qsyntax_table_p;
29
340f92b5
RS
30static void scan_sexps_forward ();
31static int char_quoted ();
32
8489eb67
RS
33int words_include_escapes;
34
e5d4f4dc
RS
35/* This is the internal form of the parse state used in parse-partial-sexp. */
36
37struct lisp_parse_state
38 {
39 int depth; /* Depth at end of parsing */
40 int instring; /* -1 if not within string, else desired terminator. */
41 int incomment; /* Nonzero if within a comment at end of parsing */
42 int comstyle; /* comment style a=0, or b=1 */
43 int quoted; /* Nonzero if just after an escape char at end of parsing */
44 int thislevelstart; /* Char number of most recent start-of-expression at current level */
45 int prevlevelstart; /* Char number of start of containing expression */
46 int location; /* Char number at which parsing stopped. */
47 int mindepth; /* Minimum depth seen while scanning. */
48 int comstart; /* Position just after last comment starter. */
49 };
50\f
37bef230
RS
51/* These variables are a cache for finding the start of a defun.
52 find_start_pos is the place for which the defun start was found.
53 find_start_value is the defun start position found for it.
54 find_start_buffer is the buffer it was found in.
55 find_start_begv is the BEGV value when it was found.
56 find_start_modiff is the value of MODIFF when it was found. */
57
58static int find_start_pos;
59static int find_start_value;
60static struct buffer *find_start_buffer;
61static int find_start_begv;
62static int find_start_modiff;
63
64/* Find a defun-start that is the last one before POS (or nearly the last).
65 We record what we find, so that another call in the same area
66 can return the same value right away. */
67
68static int
69find_defun_start (pos)
70 int pos;
71{
72 int tem;
73 int shortage;
74
75 /* Use previous finding, if it's valid and applies to this inquiry. */
76 if (current_buffer == find_start_buffer
77 /* Reuse the defun-start even if POS is a little farther on.
78 POS might be in the next defun, but that's ok.
79 Our value may not be the best possible, but will still be usable. */
80 && pos <= find_start_pos + 1000
81 && pos >= find_start_value
82 && BEGV == find_start_begv
83 && MODIFF == find_start_modiff)
84 return find_start_value;
85
86 /* Back up to start of line. */
45eb8d9d 87 tem = scan_buffer ('\n', pos, -1, &shortage, 1);
37bef230
RS
88
89 while (tem > BEGV)
90 {
91 /* Open-paren at start of line means we found our defun-start. */
92 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
93 break;
94 /* Move to beg of previous line. */
45eb8d9d 95 tem = scan_buffer ('\n', tem, -2, &shortage, 1);
37bef230
RS
96 }
97
98 /* Record what we found, for the next try. */
99 find_start_value = tem;
100 find_start_buffer = current_buffer;
101 find_start_modiff = MODIFF;
102 find_start_begv = BEGV;
103 find_start_pos = pos;
104
105 return find_start_value;
106}
107\f
8489eb67
RS
108DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
109 "Return t if ARG is a syntax table.\n\
110Any vector of 256 elements will do.")
111 (obj)
112 Lisp_Object obj;
113{
114 if (XTYPE (obj) == Lisp_Vector && XVECTOR (obj)->size == 0400)
115 return Qt;
116 return Qnil;
117}
118
119Lisp_Object
120check_syntax_table (obj)
121 Lisp_Object obj;
122{
123 register Lisp_Object tem;
124 while (tem = Fsyntax_table_p (obj),
265a9e55 125 NILP (tem))
b3cfe0c8 126 obj = wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
127 return obj;
128}
129
130
131DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
132 "Return the current syntax table.\n\
133This is the one specified by the current buffer.")
134 ()
135{
136 return current_buffer->syntax_table;
137}
138
139DEFUN ("standard-syntax-table", Fstandard_syntax_table,
140 Sstandard_syntax_table, 0, 0, 0,
141 "Return the standard syntax table.\n\
142This is the one used for new buffers.")
143 ()
144{
145 return Vstandard_syntax_table;
146}
147
148DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
149 "Construct a new syntax table and return it.\n\
150It is a copy of the TABLE, which defaults to the standard syntax table.")
151 (table)
152 Lisp_Object table;
153{
154 Lisp_Object size, val;
155 XFASTINT (size) = 0400;
156 XFASTINT (val) = 0;
157 val = Fmake_vector (size, val);
265a9e55 158 if (!NILP (table))
8489eb67 159 table = check_syntax_table (table);
265a9e55 160 else if (NILP (Vstandard_syntax_table))
8489eb67
RS
161 /* Can only be null during initialization */
162 return val;
163 else table = Vstandard_syntax_table;
164
165 bcopy (XVECTOR (table)->contents,
166 XVECTOR (val)->contents, 0400 * sizeof (Lisp_Object));
167 return val;
168}
169
170DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
171 "Select a new syntax table for the current buffer.\n\
172One argument, a syntax table.")
173 (table)
174 Lisp_Object table;
175{
176 table = check_syntax_table (table);
177 current_buffer->syntax_table = table;
178 /* Indicate that this buffer now has a specified syntax table. */
f110a664
RS
179 current_buffer->local_var_flags
180 |= XFASTINT (buffer_local_flags.syntax_table);
8489eb67
RS
181 return table;
182}
183\f
184/* Convert a letter which signifies a syntax code
185 into the code it signifies.
186 This is used by modify-syntax-entry, and other things. */
187
188unsigned char syntax_spec_code[0400] =
189 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
193 (char) Swhitespace, 0377, (char) Sstring, 0377,
194 (char) Smath, 0377, 0377, (char) Squote,
195 (char) Sopen, (char) Sclose, 0377, 0377,
196 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
197 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
198 0377, 0377, 0377, 0377,
199 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 200 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
203 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
207 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
208 };
209
210/* Indexed by syntax code, give the letter that describes it. */
211
6cb71bf6 212char syntax_code_spec[14] =
8489eb67 213 {
6cb71bf6 214 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
8489eb67
RS
215 };
216\f
217DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
218 "Return the syntax code of CHAR, described by a character.\n\
219For example, if CHAR is a word constituent, the character `?w' is returned.\n\
220The characters that correspond to various syntax codes\n\
221are listed in the documentation of `modify-syntax-entry'.")
222 (ch)
223 Lisp_Object ch;
224{
225 CHECK_NUMBER (ch, 0);
226 return make_number (syntax_code_spec[(int) SYNTAX (0xFF & XINT (ch))]);
227}
228
229/* This comment supplies the doc string for modify-syntax-entry,
230 for make-docfile to see. We cannot put this in the real DEFUN
231 due to limits in the Unix cpp.
232
2fcea188 233DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
234 "Set syntax for character CHAR according to string S.\n\
235The syntax is changed only for table TABLE, which defaults to\n\
236 the current buffer's syntax table.\n\
237The first character of S should be one of the following:\n\
32676c08
JB
238 Space or - whitespace syntax. w word constituent.\n\
239 _ symbol constituent. . punctuation.\n\
240 ( open-parenthesis. ) close-parenthesis.\n\
241 \" string quote. \\ escape.\n\
242 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 243 < comment starter. > comment ender.\n\
6cb71bf6 244 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 245\n\
8489eb67
RS
246Only single-character comment start and end sequences are represented thus.\n\
247Two-character sequences are represented as described below.\n\
248The second character of S is the matching parenthesis,\n\
249 used only if the first character is `(' or `)'.\n\
250Any additional characters are flags.\n\
e5d4f4dc 251Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
8489eb67
RS
252 1 means C is the start of a two-char comment start sequence.\n\
253 2 means C is the second character of such a sequence.\n\
254 3 means C is the start of a two-char comment end sequence.\n\
255 4 means C is the second character of such a sequence.\n\
e5d4f4dc
RS
256\n\
257There can be up to two orthogonal comment sequences. This is to support\n\
258language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
259a, but you can set the comment sequence style to b (on the second character\n\
260of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc
RS
261this flag:\n\
262 b means C is part of comment sequence b.\n\
263\n\
264 p means C is a prefix character for `backward-prefix-chars';\n\
265 such characters are treated as whitespace when they occur\n\
8489eb67 266 between expressions.")
2fcea188 267 (char, s, table)
8489eb67
RS
268*/
269
270DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
271 /* I really don't know why this is interactive
272 help-form should at least be made useful whilst reading the second arg
273 */
274 "cSet syntax for character: \nsSet syntax for %s to: ",
275 0 /* See immediately above */)
276 (c, newentry, syntax_table)
277 Lisp_Object c, newentry, syntax_table;
278{
279 register unsigned char *p, match;
280 register enum syntaxcode code;
281 Lisp_Object val;
282
283 CHECK_NUMBER (c, 0);
284 CHECK_STRING (newentry, 1);
265a9e55 285 if (NILP (syntax_table))
8489eb67
RS
286 syntax_table = current_buffer->syntax_table;
287 else
288 syntax_table = check_syntax_table (syntax_table);
289
290 p = XSTRING (newentry)->data;
291 code = (enum syntaxcode) syntax_spec_code[*p++];
292 if (((int) code & 0377) == 0377)
293 error ("invalid syntax description letter: %c", c);
294
295 match = *p;
296 if (match) p++;
297 if (match == ' ') match = 0;
298
299 XFASTINT (val) = (match << 8) + (int) code;
300 while (*p)
301 switch (*p++)
302 {
303 case '1':
304 XFASTINT (val) |= 1 << 16;
305 break;
306
307 case '2':
308 XFASTINT (val) |= 1 << 17;
309 break;
310
311 case '3':
312 XFASTINT (val) |= 1 << 18;
313 break;
314
315 case '4':
316 XFASTINT (val) |= 1 << 19;
317 break;
318
319 case 'p':
320 XFASTINT (val) |= 1 << 20;
321 break;
e5d4f4dc
RS
322
323 case 'b':
324 XFASTINT (val) |= 1 << 21;
325 break;
8489eb67
RS
326 }
327
328 XVECTOR (syntax_table)->contents[0xFF & XINT (c)] = val;
329
330 return Qnil;
331}
332\f
333/* Dump syntax table to buffer in human-readable format */
334
340f92b5 335static void
8489eb67
RS
336describe_syntax (value)
337 Lisp_Object value;
338{
339 register enum syntaxcode code;
e5d4f4dc 340 char desc, match, start1, start2, end1, end2, prefix, comstyle;
8489eb67
RS
341 char str[2];
342
343 Findent_to (make_number (16), make_number (1));
344
345 if (XTYPE (value) != Lisp_Int)
346 {
347 insert_string ("invalid");
348 return;
349 }
350
351 code = (enum syntaxcode) (XINT (value) & 0377);
352 match = (XINT (value) >> 8) & 0377;
353 start1 = (XINT (value) >> 16) & 1;
354 start2 = (XINT (value) >> 17) & 1;
355 end1 = (XINT (value) >> 18) & 1;
356 end2 = (XINT (value) >> 19) & 1;
357 prefix = (XINT (value) >> 20) & 1;
e5d4f4dc 358 comstyle = (XINT (value) >> 21) & 1;
8489eb67
RS
359
360 if ((int) code < 0 || (int) code >= (int) Smax)
361 {
362 insert_string ("invalid");
363 return;
364 }
365 desc = syntax_code_spec[(int) code];
366
367 str[0] = desc, str[1] = 0;
368 insert (str, 1);
369
370 str[0] = match ? match : ' ';
371 insert (str, 1);
372
373
374 if (start1)
375 insert ("1", 1);
376 if (start2)
377 insert ("2", 1);
378
379 if (end1)
380 insert ("3", 1);
381 if (end2)
382 insert ("4", 1);
383
384 if (prefix)
385 insert ("p", 1);
e5d4f4dc
RS
386 if (comstyle)
387 insert ("b", 1);
8489eb67
RS
388
389 insert_string ("\twhich means: ");
390
391#ifdef SWITCH_ENUM_BUG
392 switch ((int) code)
393#else
394 switch (code)
395#endif
396 {
397 case Swhitespace:
398 insert_string ("whitespace"); break;
399 case Spunct:
400 insert_string ("punctuation"); break;
401 case Sword:
402 insert_string ("word"); break;
403 case Ssymbol:
404 insert_string ("symbol"); break;
405 case Sopen:
406 insert_string ("open"); break;
407 case Sclose:
408 insert_string ("close"); break;
409 case Squote:
410 insert_string ("quote"); break;
411 case Sstring:
412 insert_string ("string"); break;
413 case Smath:
414 insert_string ("math"); break;
415 case Sescape:
416 insert_string ("escape"); break;
417 case Scharquote:
418 insert_string ("charquote"); break;
419 case Scomment:
420 insert_string ("comment"); break;
421 case Sendcomment:
422 insert_string ("endcomment"); break;
6cb71bf6
RS
423 case Sinherit:
424 insert_string ("inherit"); break;
8489eb67
RS
425 default:
426 insert_string ("invalid");
427 return;
428 }
429
430 if (match)
431 {
432 insert_string (", matches ");
410e16ff 433 insert_char (match);
8489eb67
RS
434 }
435
436 if (start1)
437 insert_string (",\n\t is the first character of a comment-start sequence");
438 if (start2)
439 insert_string (",\n\t is the second character of a comment-start sequence");
440
441 if (end1)
442 insert_string (",\n\t is the first character of a comment-end sequence");
443 if (end2)
444 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
445 if (comstyle)
446 insert_string (" (comment style b)");
447
8489eb67
RS
448 if (prefix)
449 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
450
451 insert_string ("\n");
452}
453
340f92b5 454static Lisp_Object
8489eb67
RS
455describe_syntax_1 (vector)
456 Lisp_Object vector;
457{
458 struct buffer *old = current_buffer;
459 set_buffer_internal (XBUFFER (Vstandard_output));
b3cfe0c8 460 describe_vector (vector, Qnil, describe_syntax, 0, Qnil);
8489eb67
RS
461 set_buffer_internal (old);
462 return Qnil;
463}
464
465DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
466 "Describe the syntax specifications in the syntax table.\n\
467The descriptions are inserted in a buffer, which is then displayed.")
468 ()
469{
470 internal_with_output_to_temp_buffer
471 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
472
473 return Qnil;
474}
475\f
476/* Return the position across COUNT words from FROM.
477 If that many words cannot be found before the end of the buffer, return 0.
478 COUNT negative means scan backward and stop at word beginning. */
479
480scan_words (from, count)
481 register int from, count;
482{
483 register int beg = BEGV;
484 register int end = ZV;
485 register int code;
486
487 immediate_quit = 1;
488 QUIT;
489
490 while (count > 0)
491 {
492 while (1)
493 {
494 if (from == end)
495 {
496 immediate_quit = 0;
497 return 0;
498 }
499 code = SYNTAX (FETCH_CHAR (from));
500 if (words_include_escapes
501 && (code == Sescape || code == Scharquote))
502 break;
503 if (code == Sword)
504 break;
505 from++;
506 }
507 while (1)
508 {
509 if (from == end) break;
510 code = SYNTAX (FETCH_CHAR (from));
511 if (!(words_include_escapes
512 && (code == Sescape || code == Scharquote)))
513 if (code != Sword)
514 break;
515 from++;
516 }
517 count--;
518 }
519 while (count < 0)
520 {
521 while (1)
522 {
523 if (from == beg)
524 {
525 immediate_quit = 0;
526 return 0;
527 }
528 code = SYNTAX (FETCH_CHAR (from - 1));
529 if (words_include_escapes
530 && (code == Sescape || code == Scharquote))
531 break;
532 if (code == Sword)
533 break;
534 from--;
535 }
536 while (1)
537 {
538 if (from == beg) break;
539 code = SYNTAX (FETCH_CHAR (from - 1));
540 if (!(words_include_escapes
541 && (code == Sescape || code == Scharquote)))
542 if (code != Sword)
543 break;
544 from--;
545 }
546 count++;
547 }
548
549 immediate_quit = 0;
550
551 return from;
552}
553
554DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
555 "Move point forward ARG words (backward if ARG is negative).\n\
556Normally returns t.\n\
557If an edge of the buffer is reached, point is left there\n\
558and nil is returned.")
559 (count)
560 Lisp_Object count;
561{
562 int val;
563 CHECK_NUMBER (count, 0);
564
565 if (!(val = scan_words (point, XINT (count))))
566 {
567 SET_PT (XINT (count) > 0 ? ZV : BEGV);
568 return Qnil;
569 }
570 SET_PT (val);
571 return Qt;
572}
573\f
b3cfe0c8
RS
574DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
575 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 576Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 577Set point to where scanning stops.\n\
b3cfe0c8
RS
578If N comments are found as expected, with nothing except whitespace\n\
579between them, return t; otherwise return nil.")
580 (count)
840f481c 581 Lisp_Object count;
b3cfe0c8
RS
582{
583 register int from;
584 register int stop;
585 register int c;
586 register enum syntaxcode code;
587 int comstyle = 0; /* style of comment encountered */
be720845 588 int found;
840f481c
RS
589 int count1;
590
591 CHECK_NUMBER (count, 0);
592 count1 = XINT (count);
b3cfe0c8
RS
593
594 immediate_quit = 1;
595 QUIT;
596
597 from = PT;
598
840f481c 599 while (count1 > 0)
b3cfe0c8
RS
600 {
601 stop = ZV;
04882296 602 do
b3cfe0c8 603 {
04882296
KH
604 if (from == stop)
605 {
606 SET_PT (from);
607 return Qnil;
608 }
b3cfe0c8
RS
609 c = FETCH_CHAR (from);
610 code = SYNTAX (c);
611 from++;
612 comstyle = 0;
613 if (from < stop && SYNTAX_COMSTART_FIRST (c)
614 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
615 {
7fc8191e
RS
616 /* We have encountered a comment start sequence and we
617 are ignoring all text inside comments. We must record
b3cfe0c8
RS
618 the comment style this sequence begins so that later,
619 only a comment end of the same style actually ends
7fc8191e 620 the comment section. */
b3cfe0c8
RS
621 code = Scomment;
622 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
623 from++;
624 }
04882296
KH
625 }
626 while (code == Swhitespace || code == Sendcomment);
627 if (code != Scomment)
628 {
629 immediate_quit = 0;
630 SET_PT (from - 1);
631 return Qnil;
632 }
633 /* We're at the start of a comment. */
634 while (1)
635 {
636 if (from == stop)
b3cfe0c8
RS
637 {
638 immediate_quit = 0;
04882296 639 SET_PT (from);
b3cfe0c8
RS
640 return Qnil;
641 }
04882296
KH
642 c = FETCH_CHAR (from);
643 if (SYNTAX (c) == Sendcomment
644 && SYNTAX_COMMENT_STYLE (c) == comstyle)
645 /* we have encountered a comment end of the same style
646 as the comment sequence which began this comment
647 section */
648 break;
649 from++;
650 if (from < stop && SYNTAX_COMEND_FIRST (c)
651 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
652 && SYNTAX_COMMENT_STYLE (c) == comstyle)
653 /* we have encountered a comment end of the same style
654 as the comment sequence which began this comment
655 section */
656 { from++; break; }
b3cfe0c8 657 }
04882296 658 /* We have skipped one comment. */
840f481c 659 count1--;
b3cfe0c8
RS
660 }
661
840f481c 662 while (count1 < 0)
b3cfe0c8
RS
663 {
664 stop = BEGV;
665 while (from > stop)
666 {
667 int quoted;
668
669 from--;
670 quoted = char_quoted (from);
671 if (quoted)
672 from--;
673 c = FETCH_CHAR (from);
674 code = SYNTAX (c);
675 comstyle = 0;
7fc8191e
RS
676 if (code == Sendcomment)
677 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8
RS
678 if (from > stop && SYNTAX_COMEND_SECOND (c)
679 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
680 && !char_quoted (from - 1))
681 {
7fc8191e 682 /* We must record the comment style encountered so that
b3cfe0c8 683 later, we can match only the proper comment begin
7fc8191e 684 sequence of the same style. */
b3cfe0c8
RS
685 code = Sendcomment;
686 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
687 from--;
688 }
689
690 if (code == Sendcomment && !quoted)
691 {
8f9dc2ed 692#if 0
b3cfe0c8
RS
693 if (code != SYNTAX (c))
694 /* For a two-char comment ender, we can assume
695 it does end a comment. So scan back in a simple way. */
696 {
697 if (from != stop) from--;
698 while (1)
699 {
700 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
701 && SYNTAX_COMMENT_STYLE (c) == comstyle)
702 break;
703 if (from == stop)
704 {
705 immediate_quit = 0;
c81a3712 706 SET_PT (from);
b3cfe0c8
RS
707 return Qnil;
708 }
709 from--;
710 if (SYNTAX_COMSTART_SECOND (c)
711 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
712 && SYNTAX_COMMENT_STYLE (c) == comstyle
713 && !char_quoted (from))
714 break;
715 }
716 break;
717 }
8f9dc2ed 718#endif /* 0 */
b3cfe0c8
RS
719
720 /* Look back, counting the parity of string-quotes,
721 and recording the comment-starters seen.
722 When we reach a safe place, assume that's not in a string;
723 then step the main scan to the earliest comment-starter seen
724 an even number of string quotes away from the safe place.
725
726 OFROM[I] is position of the earliest comment-starter seen
727 which is I+2X quotes from the comment-end.
728 PARITY is current parity of quotes from the comment end. */
729 {
730 int parity = 0;
731 char my_stringend = 0;
732 int string_lossage = 0;
733 int comment_end = from;
734 int comstart_pos = 0;
735 int comstart_parity = 0;
736
737 /* At beginning of range to scan, we're outside of strings;
738 that determines quote parity to the comment-end. */
739 while (from != stop)
740 {
741 /* Move back and examine a character. */
742 from--;
743
744 c = FETCH_CHAR (from);
745 code = SYNTAX (c);
746
747 /* If this char is the second of a 2-char comment sequence,
748 back up and give the pair the appropriate syntax. */
749 if (from > stop && SYNTAX_COMEND_SECOND (c)
750 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
751 {
752 code = Sendcomment;
753 from--;
754 }
755
756 else if (from > stop && SYNTAX_COMSTART_SECOND (c)
757 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
758 && comstyle == SYNTAX_COMMENT_STYLE (c))
759 {
760 code = Scomment;
761 from--;
762 }
763
764 /* Ignore escaped characters. */
765 if (char_quoted (from))
766 continue;
767
768 /* Track parity of quotes. */
769 if (code == Sstring)
770 {
771 parity ^= 1;
772 if (my_stringend == 0)
773 my_stringend = c;
774 /* If we have two kinds of string delimiters.
775 There's no way to grok this scanning backwards. */
776 else if (my_stringend != c)
777 string_lossage = 1;
778 }
779
780 /* Record comment-starters according to that
781 quote-parity to the comment-end. */
782 if (code == Scomment)
783 {
784 comstart_parity = parity;
785 comstart_pos = from;
786 }
787
788 /* If we find another earlier comment-ender,
eb8c3be9 789 any comment-starts earlier than that don't count
b3cfe0c8
RS
790 (because they go with the earlier comment-ender). */
791 if (code == Sendcomment
792 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
793 break;
794
795 /* Assume a defun-start point is outside of strings. */
796 if (code == Sopen
797 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
798 break;
799 }
800
801 if (comstart_pos == 0)
802 from = comment_end;
803 /* If the earliest comment starter
804 is followed by uniform paired string quotes or none,
805 we know it can't be inside a string
806 since if it were then the comment ender would be inside one.
807 So it does start a comment. Skip back to it. */
808 else if (comstart_parity == 0 && !string_lossage)
809 from = comstart_pos;
810 else
811 {
812 /* We had two kinds of string delimiters mixed up
813 together. Decode this going forwards.
814 Scan fwd from the previous comment ender
815 to the one in question; this records where we
816 last passed a comment starter. */
817 struct lisp_parse_state state;
818 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 819 comment_end - 1, -10000, 0, Qnil, 0);
b3cfe0c8
RS
820 if (state.incomment)
821 from = state.comstart;
822 else
823 /* We can't grok this as a comment; scan it normally. */
824 from = comment_end;
825 }
826 }
7fc8191e
RS
827 /* We have skipped one comment. */
828 break;
b3cfe0c8 829 }
c81a3712 830 else if ((code != Swhitespace && code != Scomment) || quoted)
b3cfe0c8
RS
831 {
832 immediate_quit = 0;
c81a3712 833 SET_PT (from + 1);
b3cfe0c8
RS
834 return Qnil;
835 }
836 }
837
840f481c 838 count1++;
b3cfe0c8
RS
839 }
840
841 SET_PT (from);
842 immediate_quit = 0;
843 return Qt;
844}
845\f
8489eb67
RS
846int parse_sexp_ignore_comments;
847
848Lisp_Object
849scan_lists (from, count, depth, sexpflag)
850 register int from;
851 int count, depth, sexpflag;
852{
853 Lisp_Object val;
854 register int stop;
855 register int c;
856 char stringterm;
857 int quoted;
858 int mathexit = 0;
859 register enum syntaxcode code;
860 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 861 int comstyle = 0; /* style of comment encountered */
8489eb67
RS
862
863 if (depth > 0) min_depth = 0;
864
865 immediate_quit = 1;
866 QUIT;
867
868 while (count > 0)
869 {
870 stop = ZV;
871 while (from < stop)
872 {
873 c = FETCH_CHAR (from);
e5d4f4dc 874 code = SYNTAX (c);
8489eb67
RS
875 from++;
876 if (from < stop && SYNTAX_COMSTART_FIRST (c)
877 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
878 && parse_sexp_ignore_comments)
e5d4f4dc
RS
879 {
880 /* we have encountered a comment start sequence and we
881 are ignoring all text inside comments. we must record
882 the comment style this sequence begins so that later,
883 only a comment end of the same style actually ends
884 the comment section */
885 code = Scomment;
886 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
887 from++;
888 }
889
8489eb67
RS
890 if (SYNTAX_PREFIX (c))
891 continue;
892
893#ifdef SWITCH_ENUM_BUG
894 switch ((int) code)
895#else
896 switch (code)
897#endif
898 {
899 case Sescape:
900 case Scharquote:
901 if (from == stop) goto lose;
902 from++;
903 /* treat following character as a word constituent */
904 case Sword:
905 case Ssymbol:
906 if (depth || !sexpflag) break;
907 /* This word counts as a sexp; return at end of it. */
908 while (from < stop)
909 {
910#ifdef SWITCH_ENUM_BUG
e5d4f4dc 911 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 912#else
e5d4f4dc 913 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
914#endif
915 {
916 case Scharquote:
917 case Sescape:
918 from++;
919 if (from == stop) goto lose;
920 break;
921 case Sword:
922 case Ssymbol:
923 case Squote:
924 break;
925 default:
926 goto done;
927 }
928 from++;
929 }
930 goto done;
931
932 case Scomment:
933 if (!parse_sexp_ignore_comments) break;
934 while (1)
935 {
936 if (from == stop) goto done;
e5d4f4dc
RS
937 c = FETCH_CHAR (from);
938 if (SYNTAX (c) == Sendcomment
939 && SYNTAX_COMMENT_STYLE (c) == comstyle)
940 /* we have encountered a comment end of the same style
941 as the comment sequence which began this comment
942 section */
8489eb67
RS
943 break;
944 from++;
945 if (from < stop && SYNTAX_COMEND_FIRST (c)
e5d4f4dc
RS
946 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
947 && SYNTAX_COMMENT_STYLE (c) == comstyle)
948 /* we have encountered a comment end of the same style
949 as the comment sequence which began this comment
950 section */
8489eb67
RS
951 { from++; break; }
952 }
953 break;
954
955 case Smath:
956 if (!sexpflag)
957 break;
958 if (from != stop && c == FETCH_CHAR (from))
959 from++;
960 if (mathexit)
961 {
962 mathexit = 0;
963 goto close1;
964 }
965 mathexit = 1;
966
967 case Sopen:
968 if (!++depth) goto done;
969 break;
970
971 case Sclose:
972 close1:
973 if (!--depth) goto done;
974 if (depth < min_depth)
975 error ("Containing expression ends prematurely");
976 break;
977
978 case Sstring:
979 stringterm = FETCH_CHAR (from - 1);
980 while (1)
981 {
982 if (from >= stop) goto lose;
983 if (FETCH_CHAR (from) == stringterm) break;
984#ifdef SWITCH_ENUM_BUG
e5d4f4dc 985 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 986#else
e5d4f4dc 987 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
988#endif
989 {
990 case Scharquote:
991 case Sescape:
992 from++;
993 }
994 from++;
995 }
996 from++;
997 if (!depth && sexpflag) goto done;
998 break;
999 }
1000 }
1001
1002 /* Reached end of buffer. Error if within object, return nil if between */
1003 if (depth) goto lose;
1004
1005 immediate_quit = 0;
1006 return Qnil;
1007
1008 /* End of object reached */
1009 done:
1010 count--;
1011 }
1012
1013 while (count < 0)
1014 {
1015 stop = BEGV;
1016 while (from > stop)
1017 {
1018 from--;
1019 if (quoted = char_quoted (from))
1020 from--;
1021 c = FETCH_CHAR (from);
1022 code = SYNTAX (c);
7fc8191e
RS
1023 comstyle = 0;
1024 if (code == Sendcomment)
1025 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67
RS
1026 if (from > stop && SYNTAX_COMEND_SECOND (c)
1027 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
1028 && !char_quoted (from - 1)
1029 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1030 {
1031 /* we must record the comment style encountered so that
1032 later, we can match only the proper comment begin
1033 sequence of the same style */
1034 code = Sendcomment;
1035 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
1036 from--;
1037 }
1038
8489eb67
RS
1039 if (SYNTAX_PREFIX (c))
1040 continue;
1041
1042#ifdef SWITCH_ENUM_BUG
1043 switch ((int) (quoted ? Sword : code))
1044#else
1045 switch (quoted ? Sword : code)
1046#endif
1047 {
1048 case Sword:
1049 case Ssymbol:
1050 if (depth || !sexpflag) break;
1051 /* This word counts as a sexp; count object finished after passing it. */
1052 while (from > stop)
1053 {
1054 quoted = char_quoted (from - 1);
1055 if (quoted)
1056 from--;
e5d4f4dc
RS
1057 if (! (quoted || SYNTAX (FETCH_CHAR (from - 1)) == Sword
1058 || SYNTAX (FETCH_CHAR (from - 1)) == Ssymbol
1059 || SYNTAX (FETCH_CHAR (from - 1)) == Squote))
8489eb67
RS
1060 goto done2;
1061 from--;
1062 }
1063 goto done2;
1064
1065 case Smath:
1066 if (!sexpflag)
1067 break;
1068 if (from != stop && c == FETCH_CHAR (from - 1))
1069 from--;
1070 if (mathexit)
1071 {
1072 mathexit = 0;
1073 goto open2;
1074 }
1075 mathexit = 1;
1076
1077 case Sclose:
1078 if (!++depth) goto done2;
1079 break;
1080
1081 case Sopen:
1082 open2:
1083 if (!--depth) goto done2;
1084 if (depth < min_depth)
1085 error ("Containing expression ends prematurely");
1086 break;
1087
1088 case Sendcomment:
1089 if (!parse_sexp_ignore_comments)
1090 break;
8f9dc2ed 1091#if 0
37bef230
RS
1092 if (code != SYNTAX (c))
1093 /* For a two-char comment ender, we can assume
1094 it does end a comment. So scan back in a simple way. */
1095 {
1096 if (from != stop) from--;
1097 while (1)
1098 {
1099 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1100 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1101 break;
1102 if (from == stop) goto done;
1103 from--;
1104 if (SYNTAX_COMSTART_SECOND (c)
1105 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1106 && SYNTAX_COMMENT_STYLE (c) == comstyle
1107 && !char_quoted (from))
1108 break;
1109 }
1110 break;
1111 }
8f9dc2ed 1112#endif /* 0 */
37bef230 1113
8489eb67
RS
1114 /* Look back, counting the parity of string-quotes,
1115 and recording the comment-starters seen.
1116 When we reach a safe place, assume that's not in a string;
1117 then step the main scan to the earliest comment-starter seen
1118 an even number of string quotes away from the safe place.
1119
1120 OFROM[I] is position of the earliest comment-starter seen
1121 which is I+2X quotes from the comment-end.
1122 PARITY is current parity of quotes from the comment end. */
1123 {
8489eb67 1124 int parity = 0;
e5d4f4dc
RS
1125 char my_stringend = 0;
1126 int string_lossage = 0;
1127 int comment_end = from;
37bef230
RS
1128 int comstart_pos = 0;
1129 int comstart_parity = 0;
8489eb67
RS
1130
1131 /* At beginning of range to scan, we're outside of strings;
1132 that determines quote parity to the comment-end. */
1133 while (from != stop)
1134 {
1135 /* Move back and examine a character. */
1136 from--;
1137
1138 c = FETCH_CHAR (from);
1139 code = SYNTAX (c);
1140
1141 /* If this char is the second of a 2-char comment sequence,
1142 back up and give the pair the appropriate syntax. */
1143 if (from > stop && SYNTAX_COMEND_SECOND (c)
1144 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
e5d4f4dc
RS
1145 {
1146 code = Sendcomment;
1147 from--;
1148 }
1149
8489eb67 1150 else if (from > stop && SYNTAX_COMSTART_SECOND (c)
e5d4f4dc
RS
1151 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1152 && comstyle == SYNTAX_COMMENT_STYLE (c))
1153 {
1154 code = Scomment;
1155 from--;
1156 }
8489eb67
RS
1157
1158 /* Ignore escaped characters. */
1159 if (char_quoted (from))
1160 continue;
1161
37bef230 1162 /* Track parity of quotes. */
8489eb67 1163 if (code == Sstring)
e5d4f4dc
RS
1164 {
1165 parity ^= 1;
1166 if (my_stringend == 0)
1167 my_stringend = c;
37bef230 1168 /* If we have two kinds of string delimiters.
e5d4f4dc
RS
1169 There's no way to grok this scanning backwards. */
1170 else if (my_stringend != c)
1171 string_lossage = 1;
1172 }
8489eb67
RS
1173
1174 /* Record comment-starters according to that
1175 quote-parity to the comment-end. */
1176 if (code == Scomment)
37bef230
RS
1177 {
1178 comstart_parity = parity;
1179 comstart_pos = from;
1180 }
8489eb67 1181
37bef230 1182 /* If we find another earlier comment-ender,
eb8c3be9 1183 any comment-starts earlier than that don't count
37bef230 1184 (because they go with the earlier comment-ender). */
e5d4f4dc
RS
1185 if (code == Sendcomment
1186 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
8489eb67 1187 break;
37bef230
RS
1188
1189 /* Assume a defun-start point is outside of strings. */
1190 if (code == Sopen
1191 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
1192 break;
8489eb67 1193 }
37bef230
RS
1194
1195 if (comstart_pos == 0)
1196 from = comment_end;
1197 /* If the earliest comment starter
1198 is followed by uniform paired string quotes or none,
1199 we know it can't be inside a string
1200 since if it were then the comment ender would be inside one.
1201 So it does start a comment. Skip back to it. */
1202 else if (comstart_parity == 0 && !string_lossage)
1203 from = comstart_pos;
1204 else
e5d4f4dc
RS
1205 {
1206 /* We had two kinds of string delimiters mixed up
1207 together. Decode this going forwards.
1208 Scan fwd from the previous comment ender
1209 to the one in question; this records where we
1210 last passed a comment starter. */
1211 struct lisp_parse_state state;
37bef230 1212 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 1213 comment_end - 1, -10000, 0, Qnil, 0);
e5d4f4dc
RS
1214 if (state.incomment)
1215 from = state.comstart;
1216 else
1217 /* We can't grok this as a comment; scan it normally. */
1218 from = comment_end;
1219 }
8489eb67
RS
1220 }
1221 break;
1222
1223 case Sstring:
1224 stringterm = FETCH_CHAR (from);
1225 while (1)
1226 {
1227 if (from == stop) goto lose;
1228 if (!char_quoted (from - 1)
1229 && stringterm == FETCH_CHAR (from - 1))
1230 break;
1231 from--;
1232 }
1233 from--;
1234 if (!depth && sexpflag) goto done2;
1235 break;
1236 }
1237 }
1238
1239 /* Reached start of buffer. Error if within object, return nil if between */
1240 if (depth) goto lose;
1241
1242 immediate_quit = 0;
1243 return Qnil;
1244
1245 done2:
1246 count++;
1247 }
1248
1249
1250 immediate_quit = 0;
1251 XFASTINT (val) = from;
1252 return val;
1253
1254 lose:
1255 error ("Unbalanced parentheses");
1256 /* NOTREACHED */
1257}
1258
340f92b5 1259static int
8489eb67
RS
1260char_quoted (pos)
1261 register int pos;
1262{
1263 register enum syntaxcode code;
1264 register int beg = BEGV;
1265 register int quoted = 0;
1266
1267 while (pos > beg
1268 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
1269 || code == Sescape))
1270 pos--, quoted = !quoted;
1271 return quoted;
1272}
1273
1274DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1275 "Scan from character number FROM by COUNT lists.\n\
1276Returns the character number of the position thus found.\n\
1277\n\
1278If DEPTH is nonzero, paren depth begins counting from that value,\n\
1279only places where the depth in parentheses becomes zero\n\
1280are candidates for stopping; COUNT such places are counted.\n\
1281Thus, a positive value for DEPTH means go out levels.\n\
1282\n\
1283Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1284\n\
1285If the beginning or end of (the accessible part of) the buffer is reached\n\
1286and the depth is wrong, an error is signaled.\n\
1287If the depth is right but the count is not used up, nil is returned.")
1288 (from, count, depth)
1289 Lisp_Object from, count, depth;
1290{
1291 CHECK_NUMBER (from, 0);
1292 CHECK_NUMBER (count, 1);
1293 CHECK_NUMBER (depth, 2);
1294
1295 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1296}
1297
1298DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1299 "Scan from character number FROM by COUNT balanced expressions.\n\
1300If COUNT is negative, scan backwards.\n\
1301Returns the character number of the position thus found.\n\
1302\n\
1303Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1304\n\
1305If the beginning or end of (the accessible part of) the buffer is reached\n\
1306in the middle of a parenthetical grouping, an error is signaled.\n\
1307If the beginning or end is reached between groupings\n\
1308but before count is used up, nil is returned.")
1309 (from, count)
1310 Lisp_Object from, count;
1311{
1312 CHECK_NUMBER (from, 0);
1313 CHECK_NUMBER (count, 1);
1314
1315 return scan_lists (XINT (from), XINT (count), 0, 1);
1316}
1317
1318DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1319 0, 0, 0,
1320 "Move point backward over any number of chars with prefix syntax.\n\
1321This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1322 ()
1323{
1324 int beg = BEGV;
1325 int pos = point;
1326
1327 while (pos > beg && !char_quoted (pos - 1)
1328 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
1329 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
1330 pos--;
1331
1332 SET_PT (pos);
1333
1334 return Qnil;
1335}
1336\f
8489eb67 1337/* Parse forward from FROM to END,
e5d4f4dc
RS
1338 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1339 and return a description of the state of the parse at END.
c81a3712
RS
1340 If STOPBEFORE is nonzero, stop at the start of an atom.
1341 If COMMENTSTOP is nonzero, stop at the start of a comment. */
8489eb67 1342
340f92b5 1343static void
c81a3712
RS
1344scan_sexps_forward (stateptr, from, end, targetdepth,
1345 stopbefore, oldstate, commentstop)
e5d4f4dc 1346 struct lisp_parse_state *stateptr;
8489eb67
RS
1347 register int from;
1348 int end, targetdepth, stopbefore;
1349 Lisp_Object oldstate;
c81a3712 1350 int commentstop;
8489eb67
RS
1351{
1352 struct lisp_parse_state state;
1353
1354 register enum syntaxcode code;
1355 struct level { int last, prev; };
1356 struct level levelstart[100];
1357 register struct level *curlevel = levelstart;
1358 struct level *endlevel = levelstart + 100;
1359 char prev;
1360 register int depth; /* Paren depth of current scanning location.
1361 level - levelstart equals this except
1362 when the depth becomes negative. */
1363 int mindepth; /* Lowest DEPTH value seen. */
1364 int start_quoted = 0; /* Nonzero means starting after a char quote */
1365 Lisp_Object tem;
1366
1367 immediate_quit = 1;
1368 QUIT;
1369
265a9e55 1370 if (NILP (oldstate))
8489eb67
RS
1371 {
1372 depth = 0;
1373 state.instring = -1;
1374 state.incomment = 0;
e5d4f4dc 1375 state.comstyle = 0; /* comment style a by default */
8489eb67
RS
1376 }
1377 else
1378 {
1379 tem = Fcar (oldstate);
265a9e55 1380 if (!NILP (tem))
8489eb67
RS
1381 depth = XINT (tem);
1382 else
1383 depth = 0;
1384
1385 oldstate = Fcdr (oldstate);
1386 oldstate = Fcdr (oldstate);
1387 oldstate = Fcdr (oldstate);
1388 tem = Fcar (oldstate);
265a9e55 1389 state.instring = !NILP (tem) ? XINT (tem) : -1;
8489eb67
RS
1390
1391 oldstate = Fcdr (oldstate);
1392 tem = Fcar (oldstate);
265a9e55 1393 state.incomment = !NILP (tem);
8489eb67
RS
1394
1395 oldstate = Fcdr (oldstate);
1396 tem = Fcar (oldstate);
265a9e55 1397 start_quoted = !NILP (tem);
e5d4f4dc
RS
1398
1399 /* if the eight element of the list is nil, we are in comment
1400 style a. if it is non-nil, we are in comment style b */
1401 oldstate = Fcdr (oldstate);
1402 oldstate = Fcdr (oldstate);
e5d4f4dc
RS
1403 tem = Fcar (oldstate);
1404 state.comstyle = !NILP (tem);
8489eb67
RS
1405 }
1406 state.quoted = 0;
1407 mindepth = depth;
1408
1409 curlevel->prev = -1;
1410 curlevel->last = -1;
1411
1412 /* Enter the loop at a place appropriate for initial state. */
1413
1414 if (state.incomment) goto startincomment;
1415 if (state.instring >= 0)
1416 {
1417 if (start_quoted) goto startquotedinstring;
1418 goto startinstring;
1419 }
1420 if (start_quoted) goto startquoted;
1421
1422 while (from < end)
1423 {
e5d4f4dc 1424 code = SYNTAX (FETCH_CHAR (from));
8489eb67 1425 from++;
8f9dc2ed
RS
1426 if (code == Scomment)
1427 state.comstart = from-1;
1428
1429 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1430 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
e5d4f4dc
RS
1431 {
1432 /* Record the comment style we have entered so that only
1433 the comment-end sequence of the same style actually
1434 terminates the comment section. */
1435 code = Scomment;
1436 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
8f9dc2ed 1437 state.comstart = from-1;
e5d4f4dc
RS
1438 from++;
1439 }
1440
8489eb67
RS
1441 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
1442 continue;
1443#ifdef SWITCH_ENUM_BUG
1444 switch ((int) code)
1445#else
1446 switch (code)
1447#endif
1448 {
1449 case Sescape:
1450 case Scharquote:
1451 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1452 curlevel->last = from - 1;
1453 startquoted:
1454 if (from == end) goto endquoted;
1455 from++;
1456 goto symstarted;
1457 /* treat following character as a word constituent */
1458 case Sword:
1459 case Ssymbol:
1460 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1461 curlevel->last = from - 1;
1462 symstarted:
1463 while (from < end)
1464 {
1465#ifdef SWITCH_ENUM_BUG
e5d4f4dc 1466 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 1467#else
e5d4f4dc 1468 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
1469#endif
1470 {
1471 case Scharquote:
1472 case Sescape:
1473 from++;
1474 if (from == end) goto endquoted;
1475 break;
1476 case Sword:
1477 case Ssymbol:
1478 case Squote:
1479 break;
1480 default:
1481 goto symdone;
1482 }
1483 from++;
1484 }
1485 symdone:
1486 curlevel->prev = curlevel->last;
1487 break;
1488
1489 case Scomment:
1490 state.incomment = 1;
1491 startincomment:
c81a3712
RS
1492 if (commentstop)
1493 goto done;
8489eb67
RS
1494 while (1)
1495 {
1496 if (from == end) goto done;
e5d4f4dc
RS
1497 prev = FETCH_CHAR (from);
1498 if (SYNTAX (prev) == Sendcomment
1499 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1500 /* Only terminate the comment section if the endcomment
1501 of the same style as the start sequence has been
1502 encountered. */
8489eb67
RS
1503 break;
1504 from++;
1505 if (from < end && SYNTAX_COMEND_FIRST (prev)
e5d4f4dc
RS
1506 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1507 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1508 /* Only terminate the comment section if the end-comment
1509 sequence of the same style as the start sequence has
1510 been encountered. */
8489eb67
RS
1511 { from++; break; }
1512 }
1513 state.incomment = 0;
e5d4f4dc 1514 state.comstyle = 0; /* reset the comment style */
8489eb67
RS
1515 break;
1516
1517 case Sopen:
1518 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1519 depth++;
1520 /* curlevel++->last ran into compiler bug on Apollo */
1521 curlevel->last = from - 1;
1522 if (++curlevel == endlevel)
1523 error ("Nesting too deep for parser");
1524 curlevel->prev = -1;
1525 curlevel->last = -1;
1526 if (!--targetdepth) goto done;
1527 break;
1528
1529 case Sclose:
1530 depth--;
1531 if (depth < mindepth)
1532 mindepth = depth;
1533 if (curlevel != levelstart)
1534 curlevel--;
1535 curlevel->prev = curlevel->last;
1536 if (!++targetdepth) goto done;
1537 break;
1538
1539 case Sstring:
1540 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1541 curlevel->last = from - 1;
1542 state.instring = FETCH_CHAR (from - 1);
1543 startinstring:
1544 while (1)
1545 {
1546 if (from >= end) goto done;
1547 if (FETCH_CHAR (from) == state.instring) break;
1548#ifdef SWITCH_ENUM_BUG
e5d4f4dc 1549 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 1550#else
e5d4f4dc 1551 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
1552#endif
1553 {
1554 case Scharquote:
1555 case Sescape:
1556 from++;
1557 startquotedinstring:
1558 if (from >= end) goto endquoted;
1559 }
1560 from++;
1561 }
1562 state.instring = -1;
1563 curlevel->prev = curlevel->last;
1564 from++;
1565 break;
1566
1567 case Smath:
1568 break;
1569 }
1570 }
1571 goto done;
1572
1573 stop: /* Here if stopping before start of sexp. */
1574 from--; /* We have just fetched the char that starts it; */
1575 goto done; /* but return the position before it. */
1576
1577 endquoted:
1578 state.quoted = 1;
1579 done:
1580 state.depth = depth;
1581 state.mindepth = mindepth;
1582 state.thislevelstart = curlevel->prev;
1583 state.prevlevelstart
1584 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1585 state.location = from;
1586 immediate_quit = 0;
1587
e5d4f4dc 1588 *stateptr = state;
8489eb67
RS
1589}
1590
1591/* This comment supplies the doc string for parse-partial-sexp,
1592 for make-docfile to see. We cannot put this in the real DEFUN
1593 due to limits in the Unix cpp.
1594
c81a3712 1595DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
1596 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1597Parsing stops at TO or when certain criteria are met;\n\
1598 point is set to where parsing stops.\n\
1599If fifth arg STATE is omitted or nil,\n\
1600 parsing assumes that FROM is the beginning of a function.\n\
e5d4f4dc 1601Value is a list of eight elements describing final state of parsing:\n\
af50f9e5
RS
1602 0. depth in parens.\n\
1603 1. character address of start of innermost containing list; nil if none.\n\
1604 2. character address of start of last complete sexp terminated.\n\
1605 3. non-nil if inside a string.\n\
8489eb67 1606 (it is the character that will terminate the string.)\n\
af50f9e5
RS
1607 4. t if inside a comment.\n\
1608 5. t if following a quote character.\n\
1609 6. the minimum paren-depth encountered during this scan.\n\
1610 7. t if in a comment of style `b'.\n\
8489eb67
RS
1611If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1612in parentheses becomes equal to TARGETDEPTH.\n\
1613Fourth arg STOPBEFORE non-nil means stop when come to\n\
1614 any character that starts a sexp.\n\
ec11639d 1615Fifth arg STATE is an eight-list like what this function returns.\n\
a4275ad1 1616It is used to initialize the state of the parse. Its second and third
c81a3712
RS
1617elements are ignored.
1618Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1619 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
1620*/
1621
c81a3712 1622DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 1623 0 /* See immediately above */)
c81a3712
RS
1624 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1625 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
1626{
1627 struct lisp_parse_state state;
1628 int target;
1629
265a9e55 1630 if (!NILP (targetdepth))
8489eb67
RS
1631 {
1632 CHECK_NUMBER (targetdepth, 3);
1633 target = XINT (targetdepth);
1634 }
1635 else
1636 target = -100000; /* We won't reach this depth */
1637
1638 validate_region (&from, &to);
e5d4f4dc 1639 scan_sexps_forward (&state, XINT (from), XINT (to),
c81a3712
RS
1640 target, !NILP (stopbefore), oldstate,
1641 !NILP (commentstop));
8489eb67
RS
1642
1643 SET_PT (state.location);
1644
1645 return Fcons (make_number (state.depth),
1646 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1647 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1648 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1649 Fcons (state.incomment ? Qt : Qnil,
1650 Fcons (state.quoted ? Qt : Qnil,
e5d4f4dc
RS
1651 Fcons (make_number (state.mindepth),
1652 Fcons (state.comstyle ? Qt : Qnil,
1653 Qnil))))))));
8489eb67
RS
1654}
1655\f
1656init_syntax_once ()
1657{
1658 register int i;
1659 register struct Lisp_Vector *v;
1660
1661 /* Set this now, so first buffer creation can refer to it. */
1662 /* Make it nil before calling copy-syntax-table
1663 so that copy-syntax-table will know not to try to copy from garbage */
1664 Vstandard_syntax_table = Qnil;
1665 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1666
1667 v = XVECTOR (Vstandard_syntax_table);
1668
1669 for (i = 'a'; i <= 'z'; i++)
1670 XFASTINT (v->contents[i]) = (int) Sword;
1671 for (i = 'A'; i <= 'Z'; i++)
1672 XFASTINT (v->contents[i]) = (int) Sword;
1673 for (i = '0'; i <= '9'; i++)
1674 XFASTINT (v->contents[i]) = (int) Sword;
1675 XFASTINT (v->contents['$']) = (int) Sword;
1676 XFASTINT (v->contents['%']) = (int) Sword;
1677
1678 XFASTINT (v->contents['(']) = (int) Sopen + (')' << 8);
1679 XFASTINT (v->contents[')']) = (int) Sclose + ('(' << 8);
1680 XFASTINT (v->contents['[']) = (int) Sopen + (']' << 8);
1681 XFASTINT (v->contents[']']) = (int) Sclose + ('[' << 8);
1682 XFASTINT (v->contents['{']) = (int) Sopen + ('}' << 8);
1683 XFASTINT (v->contents['}']) = (int) Sclose + ('{' << 8);
1684 XFASTINT (v->contents['"']) = (int) Sstring;
1685 XFASTINT (v->contents['\\']) = (int) Sescape;
1686
1687 for (i = 0; i < 10; i++)
1688 XFASTINT (v->contents["_-+*/&|<>="[i]]) = (int) Ssymbol;
1689
1690 for (i = 0; i < 12; i++)
1691 XFASTINT (v->contents[".,;:?!#@~^'`"[i]]) = (int) Spunct;
1692}
1693
1694syms_of_syntax ()
1695{
1696 Qsyntax_table_p = intern ("syntax-table-p");
1697 staticpro (&Qsyntax_table_p);
1698
1699 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1700 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1701
1702 words_include_escapes = 0;
1703 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1704 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1705
1706 defsubr (&Ssyntax_table_p);
1707 defsubr (&Ssyntax_table);
1708 defsubr (&Sstandard_syntax_table);
1709 defsubr (&Scopy_syntax_table);
1710 defsubr (&Sset_syntax_table);
1711 defsubr (&Schar_syntax);
1712 defsubr (&Smodify_syntax_entry);
1713 defsubr (&Sdescribe_syntax);
1714
1715 defsubr (&Sforward_word);
1716
b3cfe0c8 1717 defsubr (&Sforward_comment);
8489eb67
RS
1718 defsubr (&Sscan_lists);
1719 defsubr (&Sscan_sexps);
1720 defsubr (&Sbackward_prefix_chars);
1721 defsubr (&Sparse_partial_sexp);
1722}