[!emacs] (SWITCH_ENUM_CAST): New macro, from emacs/lisp.h
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
3a22ee35 2 Copyright (C) 1985, 1987, 1993, 1994 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
18160b98 21#include <config.h>
8489eb67
RS
22#include <ctype.h>
23#include "lisp.h"
24#include "commands.h"
25#include "buffer.h"
26#include "syntax.h"
27
28Lisp_Object Qsyntax_table_p;
29
340f92b5
RS
30static void scan_sexps_forward ();
31static int char_quoted ();
32
8489eb67
RS
33int words_include_escapes;
34
e5d4f4dc
RS
35/* This is the internal form of the parse state used in parse-partial-sexp. */
36
37struct lisp_parse_state
38 {
39 int depth; /* Depth at end of parsing */
40 int instring; /* -1 if not within string, else desired terminator. */
41 int incomment; /* Nonzero if within a comment at end of parsing */
42 int comstyle; /* comment style a=0, or b=1 */
43 int quoted; /* Nonzero if just after an escape char at end of parsing */
44 int thislevelstart; /* Char number of most recent start-of-expression at current level */
45 int prevlevelstart; /* Char number of start of containing expression */
46 int location; /* Char number at which parsing stopped. */
47 int mindepth; /* Minimum depth seen while scanning. */
48 int comstart; /* Position just after last comment starter. */
49 };
50\f
37bef230
RS
51/* These variables are a cache for finding the start of a defun.
52 find_start_pos is the place for which the defun start was found.
53 find_start_value is the defun start position found for it.
54 find_start_buffer is the buffer it was found in.
55 find_start_begv is the BEGV value when it was found.
56 find_start_modiff is the value of MODIFF when it was found. */
57
58static int find_start_pos;
59static int find_start_value;
60static struct buffer *find_start_buffer;
61static int find_start_begv;
62static int find_start_modiff;
63
64/* Find a defun-start that is the last one before POS (or nearly the last).
65 We record what we find, so that another call in the same area
66 can return the same value right away. */
67
68static int
69find_defun_start (pos)
70 int pos;
71{
72 int tem;
73 int shortage;
74
75 /* Use previous finding, if it's valid and applies to this inquiry. */
76 if (current_buffer == find_start_buffer
77 /* Reuse the defun-start even if POS is a little farther on.
78 POS might be in the next defun, but that's ok.
79 Our value may not be the best possible, but will still be usable. */
80 && pos <= find_start_pos + 1000
81 && pos >= find_start_value
82 && BEGV == find_start_begv
83 && MODIFF == find_start_modiff)
84 return find_start_value;
85
86 /* Back up to start of line. */
be5357e2 87 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
37bef230
RS
88
89 while (tem > BEGV)
90 {
91 /* Open-paren at start of line means we found our defun-start. */
92 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
93 break;
94 /* Move to beg of previous line. */
be5357e2 95 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
37bef230
RS
96 }
97
98 /* Record what we found, for the next try. */
99 find_start_value = tem;
100 find_start_buffer = current_buffer;
101 find_start_modiff = MODIFF;
102 find_start_begv = BEGV;
103 find_start_pos = pos;
104
105 return find_start_value;
106}
107\f
8489eb67
RS
108DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
109 "Return t if ARG is a syntax table.\n\
110Any vector of 256 elements will do.")
111 (obj)
112 Lisp_Object obj;
113{
330cf232 114 if (VECTORP (obj) && XVECTOR (obj)->size == 0400)
8489eb67
RS
115 return Qt;
116 return Qnil;
117}
118
119Lisp_Object
120check_syntax_table (obj)
121 Lisp_Object obj;
122{
123 register Lisp_Object tem;
124 while (tem = Fsyntax_table_p (obj),
265a9e55 125 NILP (tem))
b3cfe0c8 126 obj = wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
127 return obj;
128}
129
130
131DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
132 "Return the current syntax table.\n\
133This is the one specified by the current buffer.")
134 ()
135{
136 return current_buffer->syntax_table;
137}
138
139DEFUN ("standard-syntax-table", Fstandard_syntax_table,
140 Sstandard_syntax_table, 0, 0, 0,
141 "Return the standard syntax table.\n\
142This is the one used for new buffers.")
143 ()
144{
145 return Vstandard_syntax_table;
146}
147
148DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
149 "Construct a new syntax table and return it.\n\
150It is a copy of the TABLE, which defaults to the standard syntax table.")
151 (table)
152 Lisp_Object table;
153{
154 Lisp_Object size, val;
1e142fb7
KH
155 XSETFASTINT (size, 0400);
156 XSETFASTINT (val, 0);
8489eb67 157 val = Fmake_vector (size, val);
265a9e55 158 if (!NILP (table))
8489eb67 159 table = check_syntax_table (table);
265a9e55 160 else if (NILP (Vstandard_syntax_table))
8489eb67
RS
161 /* Can only be null during initialization */
162 return val;
163 else table = Vstandard_syntax_table;
164
165 bcopy (XVECTOR (table)->contents,
166 XVECTOR (val)->contents, 0400 * sizeof (Lisp_Object));
167 return val;
168}
169
170DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
171 "Select a new syntax table for the current buffer.\n\
172One argument, a syntax table.")
173 (table)
174 Lisp_Object table;
175{
176 table = check_syntax_table (table);
177 current_buffer->syntax_table = table;
178 /* Indicate that this buffer now has a specified syntax table. */
f110a664
RS
179 current_buffer->local_var_flags
180 |= XFASTINT (buffer_local_flags.syntax_table);
8489eb67
RS
181 return table;
182}
183\f
184/* Convert a letter which signifies a syntax code
185 into the code it signifies.
186 This is used by modify-syntax-entry, and other things. */
187
188unsigned char syntax_spec_code[0400] =
189 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
193 (char) Swhitespace, 0377, (char) Sstring, 0377,
194 (char) Smath, 0377, 0377, (char) Squote,
195 (char) Sopen, (char) Sclose, 0377, 0377,
196 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
197 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
198 0377, 0377, 0377, 0377,
199 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 200 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
203 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
207 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
208 };
209
210/* Indexed by syntax code, give the letter that describes it. */
211
6cb71bf6 212char syntax_code_spec[14] =
8489eb67 213 {
6cb71bf6 214 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
8489eb67
RS
215 };
216\f
217DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
218 "Return the syntax code of CHAR, described by a character.\n\
219For example, if CHAR is a word constituent, the character `?w' is returned.\n\
220The characters that correspond to various syntax codes\n\
221are listed in the documentation of `modify-syntax-entry'.")
222 (ch)
223 Lisp_Object ch;
224{
225 CHECK_NUMBER (ch, 0);
beefa22e
RS
226 return make_number (syntax_code_spec[(int) SYNTAX (XINT (ch))]);
227}
228
229DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
230 "Return the matching parenthesis of CHAR, or nil if none.")
231 (ch)
232 Lisp_Object ch;
233{
234 int code;
235 CHECK_NUMBER (ch, 0);
236 code = SYNTAX (XINT (ch));
a8bd7cd8 237 if (code == Sopen || code == Sclose)
beefa22e
RS
238 return make_number (SYNTAX_MATCH (XINT (ch)));
239 return Qnil;
8489eb67
RS
240}
241
242/* This comment supplies the doc string for modify-syntax-entry,
243 for make-docfile to see. We cannot put this in the real DEFUN
244 due to limits in the Unix cpp.
245
2fcea188 246DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
247 "Set syntax for character CHAR according to string S.\n\
248The syntax is changed only for table TABLE, which defaults to\n\
249 the current buffer's syntax table.\n\
250The first character of S should be one of the following:\n\
32676c08
JB
251 Space or - whitespace syntax. w word constituent.\n\
252 _ symbol constituent. . punctuation.\n\
253 ( open-parenthesis. ) close-parenthesis.\n\
254 \" string quote. \\ escape.\n\
255 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 256 < comment starter. > comment ender.\n\
6cb71bf6 257 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 258\n\
8489eb67
RS
259Only single-character comment start and end sequences are represented thus.\n\
260Two-character sequences are represented as described below.\n\
261The second character of S is the matching parenthesis,\n\
262 used only if the first character is `(' or `)'.\n\
263Any additional characters are flags.\n\
e5d4f4dc 264Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
8489eb67
RS
265 1 means C is the start of a two-char comment start sequence.\n\
266 2 means C is the second character of such a sequence.\n\
267 3 means C is the start of a two-char comment end sequence.\n\
268 4 means C is the second character of such a sequence.\n\
e5d4f4dc
RS
269\n\
270There can be up to two orthogonal comment sequences. This is to support\n\
271language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
272a, but you can set the comment sequence style to b (on the second character\n\
273of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc
RS
274this flag:\n\
275 b means C is part of comment sequence b.\n\
276\n\
277 p means C is a prefix character for `backward-prefix-chars';\n\
278 such characters are treated as whitespace when they occur\n\
8489eb67 279 between expressions.")
2fcea188 280 (char, s, table)
8489eb67
RS
281*/
282
283DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
284 /* I really don't know why this is interactive
285 help-form should at least be made useful whilst reading the second arg
286 */
287 "cSet syntax for character: \nsSet syntax for %s to: ",
288 0 /* See immediately above */)
289 (c, newentry, syntax_table)
290 Lisp_Object c, newentry, syntax_table;
291{
292 register unsigned char *p, match;
293 register enum syntaxcode code;
247e20a8 294 int val;
8489eb67
RS
295
296 CHECK_NUMBER (c, 0);
297 CHECK_STRING (newentry, 1);
265a9e55 298 if (NILP (syntax_table))
8489eb67
RS
299 syntax_table = current_buffer->syntax_table;
300 else
301 syntax_table = check_syntax_table (syntax_table);
302
303 p = XSTRING (newentry)->data;
304 code = (enum syntaxcode) syntax_spec_code[*p++];
305 if (((int) code & 0377) == 0377)
306 error ("invalid syntax description letter: %c", c);
307
308 match = *p;
309 if (match) p++;
310 if (match == ' ') match = 0;
311
247e20a8 312 val = (match << 8) + (int) code;
8489eb67
RS
313 while (*p)
314 switch (*p++)
315 {
316 case '1':
247e20a8 317 val |= 1 << 16;
8489eb67
RS
318 break;
319
320 case '2':
247e20a8 321 val |= 1 << 17;
8489eb67
RS
322 break;
323
324 case '3':
247e20a8 325 val |= 1 << 18;
8489eb67
RS
326 break;
327
328 case '4':
247e20a8 329 val |= 1 << 19;
8489eb67
RS
330 break;
331
332 case 'p':
247e20a8 333 val |= 1 << 20;
8489eb67 334 break;
e5d4f4dc
RS
335
336 case 'b':
247e20a8 337 val |= 1 << 21;
e5d4f4dc 338 break;
8489eb67
RS
339 }
340
247e20a8 341 XSETFASTINT (XVECTOR (syntax_table)->contents[0xFF & XINT (c)], val);
8489eb67
RS
342
343 return Qnil;
344}
345\f
346/* Dump syntax table to buffer in human-readable format */
347
340f92b5 348static void
8489eb67
RS
349describe_syntax (value)
350 Lisp_Object value;
351{
352 register enum syntaxcode code;
e5d4f4dc 353 char desc, match, start1, start2, end1, end2, prefix, comstyle;
8489eb67
RS
354 char str[2];
355
356 Findent_to (make_number (16), make_number (1));
357
330cf232 358 if (!INTEGERP (value))
8489eb67
RS
359 {
360 insert_string ("invalid");
361 return;
362 }
363
364 code = (enum syntaxcode) (XINT (value) & 0377);
365 match = (XINT (value) >> 8) & 0377;
366 start1 = (XINT (value) >> 16) & 1;
367 start2 = (XINT (value) >> 17) & 1;
368 end1 = (XINT (value) >> 18) & 1;
369 end2 = (XINT (value) >> 19) & 1;
370 prefix = (XINT (value) >> 20) & 1;
e5d4f4dc 371 comstyle = (XINT (value) >> 21) & 1;
8489eb67
RS
372
373 if ((int) code < 0 || (int) code >= (int) Smax)
374 {
375 insert_string ("invalid");
376 return;
377 }
378 desc = syntax_code_spec[(int) code];
379
380 str[0] = desc, str[1] = 0;
381 insert (str, 1);
382
383 str[0] = match ? match : ' ';
384 insert (str, 1);
385
386
387 if (start1)
388 insert ("1", 1);
389 if (start2)
390 insert ("2", 1);
391
392 if (end1)
393 insert ("3", 1);
394 if (end2)
395 insert ("4", 1);
396
397 if (prefix)
398 insert ("p", 1);
e5d4f4dc
RS
399 if (comstyle)
400 insert ("b", 1);
8489eb67
RS
401
402 insert_string ("\twhich means: ");
403
404#ifdef SWITCH_ENUM_BUG
405 switch ((int) code)
406#else
407 switch (code)
408#endif
409 {
410 case Swhitespace:
411 insert_string ("whitespace"); break;
412 case Spunct:
413 insert_string ("punctuation"); break;
414 case Sword:
415 insert_string ("word"); break;
416 case Ssymbol:
417 insert_string ("symbol"); break;
418 case Sopen:
419 insert_string ("open"); break;
420 case Sclose:
421 insert_string ("close"); break;
422 case Squote:
423 insert_string ("quote"); break;
424 case Sstring:
425 insert_string ("string"); break;
426 case Smath:
427 insert_string ("math"); break;
428 case Sescape:
429 insert_string ("escape"); break;
430 case Scharquote:
431 insert_string ("charquote"); break;
432 case Scomment:
433 insert_string ("comment"); break;
434 case Sendcomment:
435 insert_string ("endcomment"); break;
6cb71bf6
RS
436 case Sinherit:
437 insert_string ("inherit"); break;
8489eb67
RS
438 default:
439 insert_string ("invalid");
440 return;
441 }
442
443 if (match)
444 {
445 insert_string (", matches ");
410e16ff 446 insert_char (match);
8489eb67
RS
447 }
448
449 if (start1)
450 insert_string (",\n\t is the first character of a comment-start sequence");
451 if (start2)
452 insert_string (",\n\t is the second character of a comment-start sequence");
453
454 if (end1)
455 insert_string (",\n\t is the first character of a comment-end sequence");
456 if (end2)
457 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
458 if (comstyle)
459 insert_string (" (comment style b)");
460
8489eb67
RS
461 if (prefix)
462 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
463
464 insert_string ("\n");
465}
466
340f92b5 467static Lisp_Object
8489eb67
RS
468describe_syntax_1 (vector)
469 Lisp_Object vector;
470{
471 struct buffer *old = current_buffer;
472 set_buffer_internal (XBUFFER (Vstandard_output));
b3cfe0c8 473 describe_vector (vector, Qnil, describe_syntax, 0, Qnil);
36cd82fe 474 call0 (intern ("help-mode"));
8489eb67
RS
475 set_buffer_internal (old);
476 return Qnil;
477}
478
479DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
480 "Describe the syntax specifications in the syntax table.\n\
481The descriptions are inserted in a buffer, which is then displayed.")
482 ()
483{
484 internal_with_output_to_temp_buffer
485 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
486
487 return Qnil;
488}
489\f
490/* Return the position across COUNT words from FROM.
491 If that many words cannot be found before the end of the buffer, return 0.
492 COUNT negative means scan backward and stop at word beginning. */
493
494scan_words (from, count)
495 register int from, count;
496{
497 register int beg = BEGV;
498 register int end = ZV;
499 register int code;
500
501 immediate_quit = 1;
502 QUIT;
503
504 while (count > 0)
505 {
506 while (1)
507 {
508 if (from == end)
509 {
510 immediate_quit = 0;
511 return 0;
512 }
513 code = SYNTAX (FETCH_CHAR (from));
514 if (words_include_escapes
515 && (code == Sescape || code == Scharquote))
516 break;
517 if (code == Sword)
518 break;
519 from++;
520 }
521 while (1)
522 {
523 if (from == end) break;
524 code = SYNTAX (FETCH_CHAR (from));
525 if (!(words_include_escapes
526 && (code == Sescape || code == Scharquote)))
527 if (code != Sword)
528 break;
529 from++;
530 }
531 count--;
532 }
533 while (count < 0)
534 {
535 while (1)
536 {
537 if (from == beg)
538 {
539 immediate_quit = 0;
540 return 0;
541 }
542 code = SYNTAX (FETCH_CHAR (from - 1));
543 if (words_include_escapes
544 && (code == Sescape || code == Scharquote))
545 break;
546 if (code == Sword)
547 break;
548 from--;
549 }
550 while (1)
551 {
552 if (from == beg) break;
553 code = SYNTAX (FETCH_CHAR (from - 1));
554 if (!(words_include_escapes
555 && (code == Sescape || code == Scharquote)))
556 if (code != Sword)
557 break;
558 from--;
559 }
560 count++;
561 }
562
563 immediate_quit = 0;
564
565 return from;
566}
567
568DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
569 "Move point forward ARG words (backward if ARG is negative).\n\
570Normally returns t.\n\
571If an edge of the buffer is reached, point is left there\n\
572and nil is returned.")
573 (count)
574 Lisp_Object count;
575{
576 int val;
577 CHECK_NUMBER (count, 0);
578
579 if (!(val = scan_words (point, XINT (count))))
580 {
581 SET_PT (XINT (count) > 0 ? ZV : BEGV);
582 return Qnil;
583 }
584 SET_PT (val);
585 return Qt;
586}
587\f
b3cfe0c8
RS
588DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
589 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 590Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 591Set point to where scanning stops.\n\
b3cfe0c8
RS
592If N comments are found as expected, with nothing except whitespace\n\
593between them, return t; otherwise return nil.")
594 (count)
840f481c 595 Lisp_Object count;
b3cfe0c8
RS
596{
597 register int from;
598 register int stop;
599 register int c;
600 register enum syntaxcode code;
601 int comstyle = 0; /* style of comment encountered */
be720845 602 int found;
840f481c
RS
603 int count1;
604
605 CHECK_NUMBER (count, 0);
606 count1 = XINT (count);
b3cfe0c8
RS
607
608 immediate_quit = 1;
609 QUIT;
610
611 from = PT;
612
840f481c 613 while (count1 > 0)
b3cfe0c8
RS
614 {
615 stop = ZV;
04882296 616 do
b3cfe0c8 617 {
04882296
KH
618 if (from == stop)
619 {
620 SET_PT (from);
621 return Qnil;
622 }
b3cfe0c8
RS
623 c = FETCH_CHAR (from);
624 code = SYNTAX (c);
625 from++;
626 comstyle = 0;
627 if (from < stop && SYNTAX_COMSTART_FIRST (c)
628 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
629 {
7fc8191e
RS
630 /* We have encountered a comment start sequence and we
631 are ignoring all text inside comments. We must record
b3cfe0c8
RS
632 the comment style this sequence begins so that later,
633 only a comment end of the same style actually ends
7fc8191e 634 the comment section. */
b3cfe0c8
RS
635 code = Scomment;
636 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
637 from++;
638 }
04882296
KH
639 }
640 while (code == Swhitespace || code == Sendcomment);
641 if (code != Scomment)
642 {
643 immediate_quit = 0;
644 SET_PT (from - 1);
645 return Qnil;
646 }
647 /* We're at the start of a comment. */
648 while (1)
649 {
650 if (from == stop)
b3cfe0c8
RS
651 {
652 immediate_quit = 0;
04882296 653 SET_PT (from);
b3cfe0c8
RS
654 return Qnil;
655 }
04882296 656 c = FETCH_CHAR (from);
2eb1d6a2 657 from++;
04882296
KH
658 if (SYNTAX (c) == Sendcomment
659 && SYNTAX_COMMENT_STYLE (c) == comstyle)
660 /* we have encountered a comment end of the same style
661 as the comment sequence which began this comment
662 section */
663 break;
04882296
KH
664 if (from < stop && SYNTAX_COMEND_FIRST (c)
665 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
666 && SYNTAX_COMMENT_STYLE (c) == comstyle)
667 /* we have encountered a comment end of the same style
668 as the comment sequence which began this comment
669 section */
670 { from++; break; }
b3cfe0c8 671 }
04882296 672 /* We have skipped one comment. */
840f481c 673 count1--;
b3cfe0c8
RS
674 }
675
840f481c 676 while (count1 < 0)
b3cfe0c8
RS
677 {
678 stop = BEGV;
679 while (from > stop)
680 {
681 int quoted;
682
683 from--;
684 quoted = char_quoted (from);
685 if (quoted)
686 from--;
687 c = FETCH_CHAR (from);
688 code = SYNTAX (c);
689 comstyle = 0;
7fc8191e
RS
690 if (code == Sendcomment)
691 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8
RS
692 if (from > stop && SYNTAX_COMEND_SECOND (c)
693 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
694 && !char_quoted (from - 1))
695 {
7fc8191e 696 /* We must record the comment style encountered so that
b3cfe0c8 697 later, we can match only the proper comment begin
7fc8191e 698 sequence of the same style. */
b3cfe0c8
RS
699 code = Sendcomment;
700 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
701 from--;
702 }
703
704 if (code == Sendcomment && !quoted)
705 {
8f9dc2ed 706#if 0
b3cfe0c8
RS
707 if (code != SYNTAX (c))
708 /* For a two-char comment ender, we can assume
709 it does end a comment. So scan back in a simple way. */
710 {
711 if (from != stop) from--;
712 while (1)
713 {
714 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
715 && SYNTAX_COMMENT_STYLE (c) == comstyle)
716 break;
717 if (from == stop)
718 {
719 immediate_quit = 0;
c81a3712 720 SET_PT (from);
b3cfe0c8
RS
721 return Qnil;
722 }
723 from--;
724 if (SYNTAX_COMSTART_SECOND (c)
725 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
726 && SYNTAX_COMMENT_STYLE (c) == comstyle
727 && !char_quoted (from))
728 break;
729 }
730 break;
731 }
8f9dc2ed 732#endif /* 0 */
b3cfe0c8
RS
733
734 /* Look back, counting the parity of string-quotes,
735 and recording the comment-starters seen.
736 When we reach a safe place, assume that's not in a string;
737 then step the main scan to the earliest comment-starter seen
738 an even number of string quotes away from the safe place.
739
740 OFROM[I] is position of the earliest comment-starter seen
741 which is I+2X quotes from the comment-end.
742 PARITY is current parity of quotes from the comment end. */
743 {
744 int parity = 0;
745 char my_stringend = 0;
746 int string_lossage = 0;
747 int comment_end = from;
748 int comstart_pos = 0;
749 int comstart_parity = 0;
4841063f 750 int scanstart = from - 1;
b3cfe0c8
RS
751
752 /* At beginning of range to scan, we're outside of strings;
753 that determines quote parity to the comment-end. */
754 while (from != stop)
755 {
756 /* Move back and examine a character. */
757 from--;
758
759 c = FETCH_CHAR (from);
760 code = SYNTAX (c);
761
762 /* If this char is the second of a 2-char comment sequence,
763 back up and give the pair the appropriate syntax. */
764 if (from > stop && SYNTAX_COMEND_SECOND (c)
765 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
766 {
767 code = Sendcomment;
768 from--;
4841063f 769 c = FETCH_CHAR (from);
b3cfe0c8
RS
770 }
771
4841063f
RS
772 /* If this char starts a 2-char comment start sequence,
773 treat it like a 1-char comment starter. */
774 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
775 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
776 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
777 code = Scomment;
b3cfe0c8
RS
778
779 /* Ignore escaped characters. */
780 if (char_quoted (from))
781 continue;
782
783 /* Track parity of quotes. */
784 if (code == Sstring)
785 {
786 parity ^= 1;
787 if (my_stringend == 0)
788 my_stringend = c;
789 /* If we have two kinds of string delimiters.
790 There's no way to grok this scanning backwards. */
791 else if (my_stringend != c)
792 string_lossage = 1;
793 }
794
795 /* Record comment-starters according to that
796 quote-parity to the comment-end. */
797 if (code == Scomment)
798 {
799 comstart_parity = parity;
800 comstart_pos = from;
801 }
802
803 /* If we find another earlier comment-ender,
eb8c3be9 804 any comment-starts earlier than that don't count
b3cfe0c8
RS
805 (because they go with the earlier comment-ender). */
806 if (code == Sendcomment
807 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
808 break;
809
810 /* Assume a defun-start point is outside of strings. */
811 if (code == Sopen
812 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
813 break;
814 }
815
816 if (comstart_pos == 0)
817 from = comment_end;
818 /* If the earliest comment starter
819 is followed by uniform paired string quotes or none,
820 we know it can't be inside a string
821 since if it were then the comment ender would be inside one.
822 So it does start a comment. Skip back to it. */
823 else if (comstart_parity == 0 && !string_lossage)
824 from = comstart_pos;
825 else
826 {
827 /* We had two kinds of string delimiters mixed up
828 together. Decode this going forwards.
829 Scan fwd from the previous comment ender
830 to the one in question; this records where we
831 last passed a comment starter. */
832 struct lisp_parse_state state;
833 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 834 comment_end - 1, -10000, 0, Qnil, 0);
b3cfe0c8
RS
835 if (state.incomment)
836 from = state.comstart;
837 else
838 /* We can't grok this as a comment; scan it normally. */
839 from = comment_end;
840 }
841 }
7fc8191e
RS
842 /* We have skipped one comment. */
843 break;
b3cfe0c8 844 }
c81a3712 845 else if ((code != Swhitespace && code != Scomment) || quoted)
b3cfe0c8
RS
846 {
847 immediate_quit = 0;
c81a3712 848 SET_PT (from + 1);
b3cfe0c8
RS
849 return Qnil;
850 }
851 }
852
840f481c 853 count1++;
b3cfe0c8
RS
854 }
855
856 SET_PT (from);
857 immediate_quit = 0;
858 return Qt;
859}
860\f
8489eb67
RS
861int parse_sexp_ignore_comments;
862
863Lisp_Object
864scan_lists (from, count, depth, sexpflag)
865 register int from;
866 int count, depth, sexpflag;
867{
868 Lisp_Object val;
869 register int stop;
870 register int c;
871 char stringterm;
872 int quoted;
873 int mathexit = 0;
874 register enum syntaxcode code;
875 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 876 int comstyle = 0; /* style of comment encountered */
8489eb67
RS
877
878 if (depth > 0) min_depth = 0;
879
880 immediate_quit = 1;
881 QUIT;
882
883 while (count > 0)
884 {
885 stop = ZV;
886 while (from < stop)
887 {
888 c = FETCH_CHAR (from);
e5d4f4dc 889 code = SYNTAX (c);
8489eb67
RS
890 from++;
891 if (from < stop && SYNTAX_COMSTART_FIRST (c)
892 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
893 && parse_sexp_ignore_comments)
e5d4f4dc
RS
894 {
895 /* we have encountered a comment start sequence and we
896 are ignoring all text inside comments. we must record
897 the comment style this sequence begins so that later,
898 only a comment end of the same style actually ends
899 the comment section */
900 code = Scomment;
901 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
902 from++;
903 }
904
8489eb67
RS
905 if (SYNTAX_PREFIX (c))
906 continue;
907
908#ifdef SWITCH_ENUM_BUG
909 switch ((int) code)
910#else
911 switch (code)
912#endif
913 {
914 case Sescape:
915 case Scharquote:
916 if (from == stop) goto lose;
917 from++;
918 /* treat following character as a word constituent */
919 case Sword:
920 case Ssymbol:
921 if (depth || !sexpflag) break;
922 /* This word counts as a sexp; return at end of it. */
923 while (from < stop)
924 {
925#ifdef SWITCH_ENUM_BUG
e5d4f4dc 926 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 927#else
e5d4f4dc 928 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
929#endif
930 {
931 case Scharquote:
932 case Sescape:
933 from++;
934 if (from == stop) goto lose;
935 break;
936 case Sword:
937 case Ssymbol:
938 case Squote:
939 break;
940 default:
941 goto done;
942 }
943 from++;
944 }
945 goto done;
946
947 case Scomment:
948 if (!parse_sexp_ignore_comments) break;
949 while (1)
950 {
c01f7ec0
RS
951 if (from == stop)
952 {
953 if (depth == 0)
954 goto done;
955 goto lose;
956 }
e5d4f4dc
RS
957 c = FETCH_CHAR (from);
958 if (SYNTAX (c) == Sendcomment
959 && SYNTAX_COMMENT_STYLE (c) == comstyle)
960 /* we have encountered a comment end of the same style
961 as the comment sequence which began this comment
962 section */
8489eb67
RS
963 break;
964 from++;
965 if (from < stop && SYNTAX_COMEND_FIRST (c)
e5d4f4dc
RS
966 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
967 && SYNTAX_COMMENT_STYLE (c) == comstyle)
968 /* we have encountered a comment end of the same style
969 as the comment sequence which began this comment
970 section */
8489eb67
RS
971 { from++; break; }
972 }
973 break;
974
975 case Smath:
976 if (!sexpflag)
977 break;
978 if (from != stop && c == FETCH_CHAR (from))
979 from++;
980 if (mathexit)
981 {
982 mathexit = 0;
983 goto close1;
984 }
985 mathexit = 1;
986
987 case Sopen:
988 if (!++depth) goto done;
989 break;
990
991 case Sclose:
992 close1:
993 if (!--depth) goto done;
994 if (depth < min_depth)
995 error ("Containing expression ends prematurely");
996 break;
997
998 case Sstring:
999 stringterm = FETCH_CHAR (from - 1);
1000 while (1)
1001 {
1002 if (from >= stop) goto lose;
1003 if (FETCH_CHAR (from) == stringterm) break;
1004#ifdef SWITCH_ENUM_BUG
e5d4f4dc 1005 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 1006#else
e5d4f4dc 1007 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
1008#endif
1009 {
1010 case Scharquote:
1011 case Sescape:
1012 from++;
1013 }
1014 from++;
1015 }
1016 from++;
1017 if (!depth && sexpflag) goto done;
1018 break;
1019 }
1020 }
1021
1022 /* Reached end of buffer. Error if within object, return nil if between */
1023 if (depth) goto lose;
1024
1025 immediate_quit = 0;
1026 return Qnil;
1027
1028 /* End of object reached */
1029 done:
1030 count--;
1031 }
1032
1033 while (count < 0)
1034 {
1035 stop = BEGV;
1036 while (from > stop)
1037 {
1038 from--;
1039 if (quoted = char_quoted (from))
1040 from--;
1041 c = FETCH_CHAR (from);
1042 code = SYNTAX (c);
7fc8191e
RS
1043 comstyle = 0;
1044 if (code == Sendcomment)
1045 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67
RS
1046 if (from > stop && SYNTAX_COMEND_SECOND (c)
1047 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
1048 && !char_quoted (from - 1)
1049 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1050 {
1051 /* we must record the comment style encountered so that
1052 later, we can match only the proper comment begin
1053 sequence of the same style */
1054 code = Sendcomment;
1055 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
1056 from--;
1057 }
1058
8489eb67
RS
1059 if (SYNTAX_PREFIX (c))
1060 continue;
1061
1062#ifdef SWITCH_ENUM_BUG
1063 switch ((int) (quoted ? Sword : code))
1064#else
1065 switch (quoted ? Sword : code)
1066#endif
1067 {
1068 case Sword:
1069 case Ssymbol:
1070 if (depth || !sexpflag) break;
1071 /* This word counts as a sexp; count object finished after passing it. */
1072 while (from > stop)
1073 {
1074 quoted = char_quoted (from - 1);
1075 if (quoted)
1076 from--;
e5d4f4dc
RS
1077 if (! (quoted || SYNTAX (FETCH_CHAR (from - 1)) == Sword
1078 || SYNTAX (FETCH_CHAR (from - 1)) == Ssymbol
1079 || SYNTAX (FETCH_CHAR (from - 1)) == Squote))
8489eb67
RS
1080 goto done2;
1081 from--;
1082 }
1083 goto done2;
1084
1085 case Smath:
1086 if (!sexpflag)
1087 break;
1088 if (from != stop && c == FETCH_CHAR (from - 1))
1089 from--;
1090 if (mathexit)
1091 {
1092 mathexit = 0;
1093 goto open2;
1094 }
1095 mathexit = 1;
1096
1097 case Sclose:
1098 if (!++depth) goto done2;
1099 break;
1100
1101 case Sopen:
1102 open2:
1103 if (!--depth) goto done2;
1104 if (depth < min_depth)
1105 error ("Containing expression ends prematurely");
1106 break;
1107
1108 case Sendcomment:
1109 if (!parse_sexp_ignore_comments)
1110 break;
8f9dc2ed 1111#if 0
37bef230
RS
1112 if (code != SYNTAX (c))
1113 /* For a two-char comment ender, we can assume
1114 it does end a comment. So scan back in a simple way. */
1115 {
1116 if (from != stop) from--;
1117 while (1)
1118 {
1119 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1120 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1121 break;
c01f7ec0
RS
1122 if (from == stop)
1123 {
1124 if (depth == 0)
1125 goto done2;
1126 goto lose;
1127 }
37bef230
RS
1128 from--;
1129 if (SYNTAX_COMSTART_SECOND (c)
1130 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1131 && SYNTAX_COMMENT_STYLE (c) == comstyle
1132 && !char_quoted (from))
1133 break;
1134 }
1135 break;
1136 }
8f9dc2ed 1137#endif /* 0 */
37bef230 1138
8489eb67
RS
1139 /* Look back, counting the parity of string-quotes,
1140 and recording the comment-starters seen.
1141 When we reach a safe place, assume that's not in a string;
1142 then step the main scan to the earliest comment-starter seen
1143 an even number of string quotes away from the safe place.
1144
1145 OFROM[I] is position of the earliest comment-starter seen
1146 which is I+2X quotes from the comment-end.
1147 PARITY is current parity of quotes from the comment end. */
1148 {
8489eb67 1149 int parity = 0;
e5d4f4dc
RS
1150 char my_stringend = 0;
1151 int string_lossage = 0;
1152 int comment_end = from;
37bef230
RS
1153 int comstart_pos = 0;
1154 int comstart_parity = 0;
4841063f 1155 int scanstart = from - 1;
8489eb67
RS
1156
1157 /* At beginning of range to scan, we're outside of strings;
1158 that determines quote parity to the comment-end. */
1159 while (from != stop)
1160 {
1161 /* Move back and examine a character. */
1162 from--;
1163
1164 c = FETCH_CHAR (from);
1165 code = SYNTAX (c);
1166
1167 /* If this char is the second of a 2-char comment sequence,
1168 back up and give the pair the appropriate syntax. */
1169 if (from > stop && SYNTAX_COMEND_SECOND (c)
1170 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
e5d4f4dc
RS
1171 {
1172 code = Sendcomment;
1173 from--;
4841063f 1174 c = FETCH_CHAR (from);
e5d4f4dc
RS
1175 }
1176
4841063f
RS
1177 /* If this char starts a 2-char comment start sequence,
1178 treat it like a 1-char comment starter. */
1179 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1180 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
1181 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
1182 code = Scomment;
8489eb67
RS
1183
1184 /* Ignore escaped characters. */
1185 if (char_quoted (from))
1186 continue;
1187
37bef230 1188 /* Track parity of quotes. */
8489eb67 1189 if (code == Sstring)
e5d4f4dc
RS
1190 {
1191 parity ^= 1;
1192 if (my_stringend == 0)
1193 my_stringend = c;
37bef230 1194 /* If we have two kinds of string delimiters.
e5d4f4dc
RS
1195 There's no way to grok this scanning backwards. */
1196 else if (my_stringend != c)
1197 string_lossage = 1;
1198 }
8489eb67
RS
1199
1200 /* Record comment-starters according to that
1201 quote-parity to the comment-end. */
1202 if (code == Scomment)
37bef230
RS
1203 {
1204 comstart_parity = parity;
1205 comstart_pos = from;
1206 }
8489eb67 1207
37bef230 1208 /* If we find another earlier comment-ender,
eb8c3be9 1209 any comment-starts earlier than that don't count
37bef230 1210 (because they go with the earlier comment-ender). */
e5d4f4dc
RS
1211 if (code == Sendcomment
1212 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
8489eb67 1213 break;
37bef230
RS
1214
1215 /* Assume a defun-start point is outside of strings. */
1216 if (code == Sopen
1217 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
1218 break;
8489eb67 1219 }
37bef230
RS
1220
1221 if (comstart_pos == 0)
1222 from = comment_end;
1223 /* If the earliest comment starter
1224 is followed by uniform paired string quotes or none,
1225 we know it can't be inside a string
1226 since if it were then the comment ender would be inside one.
1227 So it does start a comment. Skip back to it. */
1228 else if (comstart_parity == 0 && !string_lossage)
1229 from = comstart_pos;
1230 else
e5d4f4dc
RS
1231 {
1232 /* We had two kinds of string delimiters mixed up
1233 together. Decode this going forwards.
1234 Scan fwd from the previous comment ender
1235 to the one in question; this records where we
1236 last passed a comment starter. */
1237 struct lisp_parse_state state;
37bef230 1238 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 1239 comment_end - 1, -10000, 0, Qnil, 0);
e5d4f4dc
RS
1240 if (state.incomment)
1241 from = state.comstart;
1242 else
1243 /* We can't grok this as a comment; scan it normally. */
1244 from = comment_end;
1245 }
8489eb67
RS
1246 }
1247 break;
1248
1249 case Sstring:
1250 stringterm = FETCH_CHAR (from);
1251 while (1)
1252 {
1253 if (from == stop) goto lose;
1254 if (!char_quoted (from - 1)
1255 && stringterm == FETCH_CHAR (from - 1))
1256 break;
1257 from--;
1258 }
1259 from--;
1260 if (!depth && sexpflag) goto done2;
1261 break;
1262 }
1263 }
1264
1265 /* Reached start of buffer. Error if within object, return nil if between */
1266 if (depth) goto lose;
1267
1268 immediate_quit = 0;
1269 return Qnil;
1270
1271 done2:
1272 count++;
1273 }
1274
1275
1276 immediate_quit = 0;
1e142fb7 1277 XSETFASTINT (val, from);
8489eb67
RS
1278 return val;
1279
1280 lose:
1281 error ("Unbalanced parentheses");
1282 /* NOTREACHED */
1283}
1284
340f92b5 1285static int
8489eb67
RS
1286char_quoted (pos)
1287 register int pos;
1288{
1289 register enum syntaxcode code;
1290 register int beg = BEGV;
1291 register int quoted = 0;
1292
1293 while (pos > beg
1294 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
1295 || code == Sescape))
1296 pos--, quoted = !quoted;
1297 return quoted;
1298}
1299
1300DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1301 "Scan from character number FROM by COUNT lists.\n\
1302Returns the character number of the position thus found.\n\
1303\n\
1304If DEPTH is nonzero, paren depth begins counting from that value,\n\
1305only places where the depth in parentheses becomes zero\n\
1306are candidates for stopping; COUNT such places are counted.\n\
1307Thus, a positive value for DEPTH means go out levels.\n\
1308\n\
1309Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1310\n\
1311If the beginning or end of (the accessible part of) the buffer is reached\n\
1312and the depth is wrong, an error is signaled.\n\
1313If the depth is right but the count is not used up, nil is returned.")
1314 (from, count, depth)
1315 Lisp_Object from, count, depth;
1316{
1317 CHECK_NUMBER (from, 0);
1318 CHECK_NUMBER (count, 1);
1319 CHECK_NUMBER (depth, 2);
1320
1321 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1322}
1323
1324DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1325 "Scan from character number FROM by COUNT balanced expressions.\n\
1326If COUNT is negative, scan backwards.\n\
1327Returns the character number of the position thus found.\n\
1328\n\
1329Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1330\n\
1331If the beginning or end of (the accessible part of) the buffer is reached\n\
1332in the middle of a parenthetical grouping, an error is signaled.\n\
1333If the beginning or end is reached between groupings\n\
1334but before count is used up, nil is returned.")
1335 (from, count)
1336 Lisp_Object from, count;
1337{
1338 CHECK_NUMBER (from, 0);
1339 CHECK_NUMBER (count, 1);
1340
1341 return scan_lists (XINT (from), XINT (count), 0, 1);
1342}
1343
1344DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1345 0, 0, 0,
1346 "Move point backward over any number of chars with prefix syntax.\n\
1347This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1348 ()
1349{
1350 int beg = BEGV;
1351 int pos = point;
1352
1353 while (pos > beg && !char_quoted (pos - 1)
1354 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
1355 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
1356 pos--;
1357
1358 SET_PT (pos);
1359
1360 return Qnil;
1361}
1362\f
8489eb67 1363/* Parse forward from FROM to END,
e5d4f4dc
RS
1364 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1365 and return a description of the state of the parse at END.
c81a3712
RS
1366 If STOPBEFORE is nonzero, stop at the start of an atom.
1367 If COMMENTSTOP is nonzero, stop at the start of a comment. */
8489eb67 1368
340f92b5 1369static void
c81a3712
RS
1370scan_sexps_forward (stateptr, from, end, targetdepth,
1371 stopbefore, oldstate, commentstop)
e5d4f4dc 1372 struct lisp_parse_state *stateptr;
8489eb67
RS
1373 register int from;
1374 int end, targetdepth, stopbefore;
1375 Lisp_Object oldstate;
c81a3712 1376 int commentstop;
8489eb67
RS
1377{
1378 struct lisp_parse_state state;
1379
1380 register enum syntaxcode code;
1381 struct level { int last, prev; };
1382 struct level levelstart[100];
1383 register struct level *curlevel = levelstart;
1384 struct level *endlevel = levelstart + 100;
1385 char prev;
1386 register int depth; /* Paren depth of current scanning location.
1387 level - levelstart equals this except
1388 when the depth becomes negative. */
1389 int mindepth; /* Lowest DEPTH value seen. */
1390 int start_quoted = 0; /* Nonzero means starting after a char quote */
1391 Lisp_Object tem;
1392
1393 immediate_quit = 1;
1394 QUIT;
1395
265a9e55 1396 if (NILP (oldstate))
8489eb67
RS
1397 {
1398 depth = 0;
1399 state.instring = -1;
1400 state.incomment = 0;
e5d4f4dc 1401 state.comstyle = 0; /* comment style a by default */
8489eb67
RS
1402 }
1403 else
1404 {
1405 tem = Fcar (oldstate);
265a9e55 1406 if (!NILP (tem))
8489eb67
RS
1407 depth = XINT (tem);
1408 else
1409 depth = 0;
1410
1411 oldstate = Fcdr (oldstate);
1412 oldstate = Fcdr (oldstate);
1413 oldstate = Fcdr (oldstate);
1414 tem = Fcar (oldstate);
265a9e55 1415 state.instring = !NILP (tem) ? XINT (tem) : -1;
8489eb67
RS
1416
1417 oldstate = Fcdr (oldstate);
1418 tem = Fcar (oldstate);
265a9e55 1419 state.incomment = !NILP (tem);
8489eb67
RS
1420
1421 oldstate = Fcdr (oldstate);
1422 tem = Fcar (oldstate);
265a9e55 1423 start_quoted = !NILP (tem);
e5d4f4dc
RS
1424
1425 /* if the eight element of the list is nil, we are in comment
1426 style a. if it is non-nil, we are in comment style b */
1427 oldstate = Fcdr (oldstate);
1428 oldstate = Fcdr (oldstate);
e5d4f4dc
RS
1429 tem = Fcar (oldstate);
1430 state.comstyle = !NILP (tem);
8489eb67
RS
1431 }
1432 state.quoted = 0;
1433 mindepth = depth;
1434
1435 curlevel->prev = -1;
1436 curlevel->last = -1;
1437
1438 /* Enter the loop at a place appropriate for initial state. */
1439
1440 if (state.incomment) goto startincomment;
1441 if (state.instring >= 0)
1442 {
1443 if (start_quoted) goto startquotedinstring;
1444 goto startinstring;
1445 }
1446 if (start_quoted) goto startquoted;
1447
1448 while (from < end)
1449 {
e5d4f4dc 1450 code = SYNTAX (FETCH_CHAR (from));
8489eb67 1451 from++;
8f9dc2ed
RS
1452 if (code == Scomment)
1453 state.comstart = from-1;
1454
1455 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1456 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
e5d4f4dc
RS
1457 {
1458 /* Record the comment style we have entered so that only
1459 the comment-end sequence of the same style actually
1460 terminates the comment section. */
1461 code = Scomment;
1462 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
8f9dc2ed 1463 state.comstart = from-1;
e5d4f4dc
RS
1464 from++;
1465 }
1466
8489eb67
RS
1467 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
1468 continue;
1469#ifdef SWITCH_ENUM_BUG
1470 switch ((int) code)
1471#else
1472 switch (code)
1473#endif
1474 {
1475 case Sescape:
1476 case Scharquote:
1477 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1478 curlevel->last = from - 1;
1479 startquoted:
1480 if (from == end) goto endquoted;
1481 from++;
1482 goto symstarted;
1483 /* treat following character as a word constituent */
1484 case Sword:
1485 case Ssymbol:
1486 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1487 curlevel->last = from - 1;
1488 symstarted:
1489 while (from < end)
1490 {
1491#ifdef SWITCH_ENUM_BUG
e5d4f4dc 1492 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 1493#else
e5d4f4dc 1494 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
1495#endif
1496 {
1497 case Scharquote:
1498 case Sescape:
1499 from++;
1500 if (from == end) goto endquoted;
1501 break;
1502 case Sword:
1503 case Ssymbol:
1504 case Squote:
1505 break;
1506 default:
1507 goto symdone;
1508 }
1509 from++;
1510 }
1511 symdone:
1512 curlevel->prev = curlevel->last;
1513 break;
1514
5a28e48c
RS
1515 startincomment:
1516 if (commentstop)
1517 goto done;
1518 if (from != BEGV)
1519 {
1520 /* Enter the loop in the middle so that we find
1521 a 2-char comment ender if we start in the middle of it. */
1522 prev = FETCH_CHAR (from - 1);
1523 goto startincomment_1;
1524 }
1525 /* At beginning of buffer, enter the loop the ordinary way. */
1526
8489eb67
RS
1527 case Scomment:
1528 state.incomment = 1;
c81a3712
RS
1529 if (commentstop)
1530 goto done;
8489eb67
RS
1531 while (1)
1532 {
1533 if (from == end) goto done;
e5d4f4dc
RS
1534 prev = FETCH_CHAR (from);
1535 if (SYNTAX (prev) == Sendcomment
1536 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1537 /* Only terminate the comment section if the endcomment
1538 of the same style as the start sequence has been
1539 encountered. */
8489eb67
RS
1540 break;
1541 from++;
5a28e48c 1542 startincomment_1:
8489eb67 1543 if (from < end && SYNTAX_COMEND_FIRST (prev)
e5d4f4dc
RS
1544 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1545 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1546 /* Only terminate the comment section if the end-comment
1547 sequence of the same style as the start sequence has
1548 been encountered. */
8489eb67
RS
1549 { from++; break; }
1550 }
1551 state.incomment = 0;
e5d4f4dc 1552 state.comstyle = 0; /* reset the comment style */
8489eb67
RS
1553 break;
1554
1555 case Sopen:
1556 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1557 depth++;
1558 /* curlevel++->last ran into compiler bug on Apollo */
1559 curlevel->last = from - 1;
1560 if (++curlevel == endlevel)
1561 error ("Nesting too deep for parser");
1562 curlevel->prev = -1;
1563 curlevel->last = -1;
1564 if (!--targetdepth) goto done;
1565 break;
1566
1567 case Sclose:
1568 depth--;
1569 if (depth < mindepth)
1570 mindepth = depth;
1571 if (curlevel != levelstart)
1572 curlevel--;
1573 curlevel->prev = curlevel->last;
1574 if (!++targetdepth) goto done;
1575 break;
1576
1577 case Sstring:
1578 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1579 curlevel->last = from - 1;
1580 state.instring = FETCH_CHAR (from - 1);
1581 startinstring:
1582 while (1)
1583 {
1584 if (from >= end) goto done;
1585 if (FETCH_CHAR (from) == state.instring) break;
1586#ifdef SWITCH_ENUM_BUG
e5d4f4dc 1587 switch ((int) SYNTAX (FETCH_CHAR (from)))
8489eb67 1588#else
e5d4f4dc 1589 switch (SYNTAX (FETCH_CHAR (from)))
8489eb67
RS
1590#endif
1591 {
1592 case Scharquote:
1593 case Sescape:
1594 from++;
1595 startquotedinstring:
1596 if (from >= end) goto endquoted;
1597 }
1598 from++;
1599 }
1600 state.instring = -1;
1601 curlevel->prev = curlevel->last;
1602 from++;
1603 break;
1604
1605 case Smath:
1606 break;
1607 }
1608 }
1609 goto done;
1610
1611 stop: /* Here if stopping before start of sexp. */
1612 from--; /* We have just fetched the char that starts it; */
1613 goto done; /* but return the position before it. */
1614
1615 endquoted:
1616 state.quoted = 1;
1617 done:
1618 state.depth = depth;
1619 state.mindepth = mindepth;
1620 state.thislevelstart = curlevel->prev;
1621 state.prevlevelstart
1622 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1623 state.location = from;
1624 immediate_quit = 0;
1625
e5d4f4dc 1626 *stateptr = state;
8489eb67
RS
1627}
1628
1629/* This comment supplies the doc string for parse-partial-sexp,
1630 for make-docfile to see. We cannot put this in the real DEFUN
1631 due to limits in the Unix cpp.
1632
c81a3712 1633DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
1634 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1635Parsing stops at TO or when certain criteria are met;\n\
1636 point is set to where parsing stops.\n\
1637If fifth arg STATE is omitted or nil,\n\
1638 parsing assumes that FROM is the beginning of a function.\n\
e5d4f4dc 1639Value is a list of eight elements describing final state of parsing:\n\
af50f9e5
RS
1640 0. depth in parens.\n\
1641 1. character address of start of innermost containing list; nil if none.\n\
1642 2. character address of start of last complete sexp terminated.\n\
1643 3. non-nil if inside a string.\n\
8489eb67 1644 (it is the character that will terminate the string.)\n\
af50f9e5
RS
1645 4. t if inside a comment.\n\
1646 5. t if following a quote character.\n\
1647 6. the minimum paren-depth encountered during this scan.\n\
1648 7. t if in a comment of style `b'.\n\
8489eb67
RS
1649If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1650in parentheses becomes equal to TARGETDEPTH.\n\
1651Fourth arg STOPBEFORE non-nil means stop when come to\n\
1652 any character that starts a sexp.\n\
ec11639d 1653Fifth arg STATE is an eight-list like what this function returns.\n\
a4275ad1 1654It is used to initialize the state of the parse. Its second and third
c81a3712
RS
1655elements are ignored.
1656Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1657 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
1658*/
1659
c81a3712 1660DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 1661 0 /* See immediately above */)
c81a3712
RS
1662 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1663 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
1664{
1665 struct lisp_parse_state state;
1666 int target;
1667
265a9e55 1668 if (!NILP (targetdepth))
8489eb67
RS
1669 {
1670 CHECK_NUMBER (targetdepth, 3);
1671 target = XINT (targetdepth);
1672 }
1673 else
1674 target = -100000; /* We won't reach this depth */
1675
1676 validate_region (&from, &to);
e5d4f4dc 1677 scan_sexps_forward (&state, XINT (from), XINT (to),
c81a3712
RS
1678 target, !NILP (stopbefore), oldstate,
1679 !NILP (commentstop));
8489eb67
RS
1680
1681 SET_PT (state.location);
1682
1683 return Fcons (make_number (state.depth),
1684 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1685 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1686 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1687 Fcons (state.incomment ? Qt : Qnil,
1688 Fcons (state.quoted ? Qt : Qnil,
e5d4f4dc
RS
1689 Fcons (make_number (state.mindepth),
1690 Fcons (state.comstyle ? Qt : Qnil,
1691 Qnil))))))));
8489eb67
RS
1692}
1693\f
1694init_syntax_once ()
1695{
1696 register int i;
1697 register struct Lisp_Vector *v;
1698
1699 /* Set this now, so first buffer creation can refer to it. */
1700 /* Make it nil before calling copy-syntax-table
1701 so that copy-syntax-table will know not to try to copy from garbage */
1702 Vstandard_syntax_table = Qnil;
1703 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1704
1705 v = XVECTOR (Vstandard_syntax_table);
1706
1707 for (i = 'a'; i <= 'z'; i++)
1e142fb7 1708 XSETFASTINT (v->contents[i], (int) Sword);
8489eb67 1709 for (i = 'A'; i <= 'Z'; i++)
1e142fb7 1710 XSETFASTINT (v->contents[i], (int) Sword);
8489eb67 1711 for (i = '0'; i <= '9'; i++)
1e142fb7
KH
1712 XSETFASTINT (v->contents[i], (int) Sword);
1713 XSETFASTINT (v->contents['$'], (int) Sword);
1714 XSETFASTINT (v->contents['%'], (int) Sword);
1715
1716 XSETFASTINT (v->contents['('], (int) Sopen + (')' << 8));
1717 XSETFASTINT (v->contents[')'], (int) Sclose + ('(' << 8));
1718 XSETFASTINT (v->contents['['], (int) Sopen + (']' << 8));
1719 XSETFASTINT (v->contents[']'], (int) Sclose + ('[' << 8));
1720 XSETFASTINT (v->contents['{'], (int) Sopen + ('}' << 8));
1721 XSETFASTINT (v->contents['}'], (int) Sclose + ('{' << 8));
1722 XSETFASTINT (v->contents['"'], (int) Sstring);
1723 XSETFASTINT (v->contents['\\'], (int) Sescape);
8489eb67
RS
1724
1725 for (i = 0; i < 10; i++)
1e142fb7 1726 XSETFASTINT (v->contents["_-+*/&|<>="[i]], (int) Ssymbol);
8489eb67
RS
1727
1728 for (i = 0; i < 12; i++)
1e142fb7 1729 XSETFASTINT (v->contents[".,;:?!#@~^'`"[i]], (int) Spunct);
8489eb67
RS
1730}
1731
1732syms_of_syntax ()
1733{
1734 Qsyntax_table_p = intern ("syntax-table-p");
1735 staticpro (&Qsyntax_table_p);
1736
1737 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1738 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1739
1740 words_include_escapes = 0;
1741 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1742 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1743
1744 defsubr (&Ssyntax_table_p);
1745 defsubr (&Ssyntax_table);
1746 defsubr (&Sstandard_syntax_table);
1747 defsubr (&Scopy_syntax_table);
1748 defsubr (&Sset_syntax_table);
1749 defsubr (&Schar_syntax);
beefa22e 1750 defsubr (&Smatching_paren);
8489eb67
RS
1751 defsubr (&Smodify_syntax_entry);
1752 defsubr (&Sdescribe_syntax);
1753
1754 defsubr (&Sforward_word);
1755
b3cfe0c8 1756 defsubr (&Sforward_comment);
8489eb67
RS
1757 defsubr (&Sscan_lists);
1758 defsubr (&Sscan_sexps);
1759 defsubr (&Sbackward_prefix_chars);
1760 defsubr (&Sparse_partial_sexp);
1761}