(xbufobjfwd, xbuflocal, xwinconfig):
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
0220c518 2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
18160b98 21#include <config.h>
8489eb67
RS
22#include <ctype.h>
23#include "lisp.h"
24#include "commands.h"
25#include "buffer.h"
26#include "syntax.h"
27
28Lisp_Object Qsyntax_table_p;
29
340f92b5
RS
30static void scan_sexps_forward ();
31static int char_quoted ();
32
8489eb67
RS
33int words_include_escapes;
34
e5d4f4dc
RS
35/* This is the internal form of the parse state used in parse-partial-sexp. */
36
37struct lisp_parse_state
38 {
39 int depth; /* Depth at end of parsing */
40 int instring; /* -1 if not within string, else desired terminator. */
41 int incomment; /* Nonzero if within a comment at end of parsing */
42 int comstyle; /* comment style a=0, or b=1 */
43 int quoted; /* Nonzero if just after an escape char at end of parsing */
44 int thislevelstart; /* Char number of most recent start-of-expression at current level */
45 int prevlevelstart; /* Char number of start of containing expression */
46 int location; /* Char number at which parsing stopped. */
47 int mindepth; /* Minimum depth seen while scanning. */
48 int comstart; /* Position just after last comment starter. */
49 };
50\f
37bef230
RS
51/* These variables are a cache for finding the start of a defun.
52 find_start_pos is the place for which the defun start was found.
53 find_start_value is the defun start position found for it.
54 find_start_buffer is the buffer it was found in.
55 find_start_begv is the BEGV value when it was found.
56 find_start_modiff is the value of MODIFF when it was found. */
57
58static int find_start_pos;
59static int find_start_value;
60static struct buffer *find_start_buffer;
61static int find_start_begv;
62static int find_start_modiff;
63
64/* Find a defun-start that is the last one before POS (or nearly the last).
65 We record what we find, so that another call in the same area
66 can return the same value right away. */
67
68static int
69find_defun_start (pos)
70 int pos;
71{
72 int tem;
73 int shortage;
74
75 /* Use previous finding, if it's valid and applies to this inquiry. */
76 if (current_buffer == find_start_buffer
77 /* Reuse the defun-start even if POS is a little farther on.
78 POS might be in the next defun, but that's ok.
79 Our value may not be the best possible, but will still be usable. */
80 && pos <= find_start_pos + 1000
81 && pos >= find_start_value
82 && BEGV == find_start_begv
83 && MODIFF == find_start_modiff)
84 return find_start_value;
85
86 /* Back up to start of line. */
be5357e2 87 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
37bef230
RS
88
89 while (tem > BEGV)
90 {
91 /* Open-paren at start of line means we found our defun-start. */
92 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
93 break;
94 /* Move to beg of previous line. */
be5357e2 95 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
37bef230
RS
96 }
97
98 /* Record what we found, for the next try. */
99 find_start_value = tem;
100 find_start_buffer = current_buffer;
101 find_start_modiff = MODIFF;
102 find_start_begv = BEGV;
103 find_start_pos = pos;
104
105 return find_start_value;
106}
107\f
8489eb67
RS
108DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
109 "Return t if ARG is a syntax table.\n\
110Any vector of 256 elements will do.")
111 (obj)
112 Lisp_Object obj;
113{
330cf232 114 if (VECTORP (obj) && XVECTOR (obj)->size == 0400)
8489eb67
RS
115 return Qt;
116 return Qnil;
117}
118
119Lisp_Object
120check_syntax_table (obj)
121 Lisp_Object obj;
122{
123 register Lisp_Object tem;
124 while (tem = Fsyntax_table_p (obj),
265a9e55 125 NILP (tem))
b3cfe0c8 126 obj = wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
127 return obj;
128}
129
130
131DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
132 "Return the current syntax table.\n\
133This is the one specified by the current buffer.")
134 ()
135{
136 return current_buffer->syntax_table;
137}
138
139DEFUN ("standard-syntax-table", Fstandard_syntax_table,
140 Sstandard_syntax_table, 0, 0, 0,
141 "Return the standard syntax table.\n\
142This is the one used for new buffers.")
143 ()
144{
145 return Vstandard_syntax_table;
146}
147
148DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
149 "Construct a new syntax table and return it.\n\
150It is a copy of the TABLE, which defaults to the standard syntax table.")
151 (table)
152 Lisp_Object table;
153{
154 Lisp_Object size, val;
1e142fb7
KH
155 XSETFASTINT (size, 0400);
156 XSETFASTINT (val, 0);
8489eb67 157 val = Fmake_vector (size, val);
265a9e55 158 if (!NILP (table))
8489eb67 159 table = check_syntax_table (table);
265a9e55 160 else if (NILP (Vstandard_syntax_table))
8489eb67
RS
161 /* Can only be null during initialization */
162 return val;
163 else table = Vstandard_syntax_table;
164
165 bcopy (XVECTOR (table)->contents,
166 XVECTOR (val)->contents, 0400 * sizeof (Lisp_Object));
167 return val;
168}
169
170DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
171 "Select a new syntax table for the current buffer.\n\
172One argument, a syntax table.")
173 (table)
174 Lisp_Object table;
175{
176 table = check_syntax_table (table);
177 current_buffer->syntax_table = table;
178 /* Indicate that this buffer now has a specified syntax table. */
f110a664
RS
179 current_buffer->local_var_flags
180 |= XFASTINT (buffer_local_flags.syntax_table);
8489eb67
RS
181 return table;
182}
183\f
184/* Convert a letter which signifies a syntax code
185 into the code it signifies.
186 This is used by modify-syntax-entry, and other things. */
187
188unsigned char syntax_spec_code[0400] =
189 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
193 (char) Swhitespace, 0377, (char) Sstring, 0377,
194 (char) Smath, 0377, 0377, (char) Squote,
195 (char) Sopen, (char) Sclose, 0377, 0377,
196 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
197 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
198 0377, 0377, 0377, 0377,
199 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 200 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
203 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
207 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
208 };
209
210/* Indexed by syntax code, give the letter that describes it. */
211
6cb71bf6 212char syntax_code_spec[14] =
8489eb67 213 {
6cb71bf6 214 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
8489eb67
RS
215 };
216\f
217DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
218 "Return the syntax code of CHAR, described by a character.\n\
219For example, if CHAR is a word constituent, the character `?w' is returned.\n\
220The characters that correspond to various syntax codes\n\
221are listed in the documentation of `modify-syntax-entry'.")
222 (ch)
223 Lisp_Object ch;
224{
225 CHECK_NUMBER (ch, 0);
beefa22e
RS
226 return make_number (syntax_code_spec[(int) SYNTAX (XINT (ch))]);
227}
228
229DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
230 "Return the matching parenthesis of CHAR, or nil if none.")
231 (ch)
232 Lisp_Object ch;
233{
234 int code;
235 CHECK_NUMBER (ch, 0);
236 code = SYNTAX (XINT (ch));
a8bd7cd8 237 if (code == Sopen || code == Sclose)
beefa22e
RS
238 return make_number (SYNTAX_MATCH (XINT (ch)));
239 return Qnil;
8489eb67
RS
240}
241
242/* This comment supplies the doc string for modify-syntax-entry,
243 for make-docfile to see. We cannot put this in the real DEFUN
244 due to limits in the Unix cpp.
245
2fcea188 246DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
247 "Set syntax for character CHAR according to string S.\n\
248The syntax is changed only for table TABLE, which defaults to\n\
249 the current buffer's syntax table.\n\
250The first character of S should be one of the following:\n\
32676c08
JB
251 Space or - whitespace syntax. w word constituent.\n\
252 _ symbol constituent. . punctuation.\n\
253 ( open-parenthesis. ) close-parenthesis.\n\
254 \" string quote. \\ escape.\n\
255 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 256 < comment starter. > comment ender.\n\
6cb71bf6 257 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 258\n\
8489eb67
RS
259Only single-character comment start and end sequences are represented thus.\n\
260Two-character sequences are represented as described below.\n\
261The second character of S is the matching parenthesis,\n\
262 used only if the first character is `(' or `)'.\n\
263Any additional characters are flags.\n\
e5d4f4dc 264Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
8489eb67
RS
265 1 means C is the start of a two-char comment start sequence.\n\
266 2 means C is the second character of such a sequence.\n\
267 3 means C is the start of a two-char comment end sequence.\n\
268 4 means C is the second character of such a sequence.\n\
e5d4f4dc
RS
269\n\
270There can be up to two orthogonal comment sequences. This is to support\n\
271language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
272a, but you can set the comment sequence style to b (on the second character\n\
273of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc
RS
274this flag:\n\
275 b means C is part of comment sequence b.\n\
276\n\
277 p means C is a prefix character for `backward-prefix-chars';\n\
278 such characters are treated as whitespace when they occur\n\
8489eb67 279 between expressions.")
2fcea188 280 (char, s, table)
8489eb67
RS
281*/
282
283DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
284 /* I really don't know why this is interactive
285 help-form should at least be made useful whilst reading the second arg
286 */
287 "cSet syntax for character: \nsSet syntax for %s to: ",
288 0 /* See immediately above */)
289 (c, newentry, syntax_table)
290 Lisp_Object c, newentry, syntax_table;
291{
292 register unsigned char *p, match;
293 register enum syntaxcode code;
247e20a8 294 int val;
8489eb67
RS
295
296 CHECK_NUMBER (c, 0);
297 CHECK_STRING (newentry, 1);
265a9e55 298 if (NILP (syntax_table))
8489eb67
RS
299 syntax_table = current_buffer->syntax_table;
300 else
301 syntax_table = check_syntax_table (syntax_table);
302
303 p = XSTRING (newentry)->data;
304 code = (enum syntaxcode) syntax_spec_code[*p++];
305 if (((int) code & 0377) == 0377)
306 error ("invalid syntax description letter: %c", c);
307
308 match = *p;
309 if (match) p++;
310 if (match == ' ') match = 0;
311
247e20a8 312 val = (match << 8) + (int) code;
8489eb67
RS
313 while (*p)
314 switch (*p++)
315 {
316 case '1':
247e20a8 317 val |= 1 << 16;
8489eb67
RS
318 break;
319
320 case '2':
247e20a8 321 val |= 1 << 17;
8489eb67
RS
322 break;
323
324 case '3':
247e20a8 325 val |= 1 << 18;
8489eb67
RS
326 break;
327
328 case '4':
247e20a8 329 val |= 1 << 19;
8489eb67
RS
330 break;
331
332 case 'p':
247e20a8 333 val |= 1 << 20;
8489eb67 334 break;
e5d4f4dc
RS
335
336 case 'b':
247e20a8 337 val |= 1 << 21;
e5d4f4dc 338 break;
8489eb67
RS
339 }
340
247e20a8 341 XSETFASTINT (XVECTOR (syntax_table)->contents[0xFF & XINT (c)], val);
8489eb67
RS
342
343 return Qnil;
344}
345\f
346/* Dump syntax table to buffer in human-readable format */
347
340f92b5 348static void
8489eb67
RS
349describe_syntax (value)
350 Lisp_Object value;
351{
352 register enum syntaxcode code;
e5d4f4dc 353 char desc, match, start1, start2, end1, end2, prefix, comstyle;
8489eb67
RS
354 char str[2];
355
356 Findent_to (make_number (16), make_number (1));
357
330cf232 358 if (!INTEGERP (value))
8489eb67
RS
359 {
360 insert_string ("invalid");
361 return;
362 }
363
364 code = (enum syntaxcode) (XINT (value) & 0377);
365 match = (XINT (value) >> 8) & 0377;
366 start1 = (XINT (value) >> 16) & 1;
367 start2 = (XINT (value) >> 17) & 1;
368 end1 = (XINT (value) >> 18) & 1;
369 end2 = (XINT (value) >> 19) & 1;
370 prefix = (XINT (value) >> 20) & 1;
e5d4f4dc 371 comstyle = (XINT (value) >> 21) & 1;
8489eb67
RS
372
373 if ((int) code < 0 || (int) code >= (int) Smax)
374 {
375 insert_string ("invalid");
376 return;
377 }
378 desc = syntax_code_spec[(int) code];
379
380 str[0] = desc, str[1] = 0;
381 insert (str, 1);
382
383 str[0] = match ? match : ' ';
384 insert (str, 1);
385
386
387 if (start1)
388 insert ("1", 1);
389 if (start2)
390 insert ("2", 1);
391
392 if (end1)
393 insert ("3", 1);
394 if (end2)
395 insert ("4", 1);
396
397 if (prefix)
398 insert ("p", 1);
e5d4f4dc
RS
399 if (comstyle)
400 insert ("b", 1);
8489eb67
RS
401
402 insert_string ("\twhich means: ");
403
0220c518 404 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
405 {
406 case Swhitespace:
407 insert_string ("whitespace"); break;
408 case Spunct:
409 insert_string ("punctuation"); break;
410 case Sword:
411 insert_string ("word"); break;
412 case Ssymbol:
413 insert_string ("symbol"); break;
414 case Sopen:
415 insert_string ("open"); break;
416 case Sclose:
417 insert_string ("close"); break;
418 case Squote:
419 insert_string ("quote"); break;
420 case Sstring:
421 insert_string ("string"); break;
422 case Smath:
423 insert_string ("math"); break;
424 case Sescape:
425 insert_string ("escape"); break;
426 case Scharquote:
427 insert_string ("charquote"); break;
428 case Scomment:
429 insert_string ("comment"); break;
430 case Sendcomment:
431 insert_string ("endcomment"); break;
6cb71bf6
RS
432 case Sinherit:
433 insert_string ("inherit"); break;
8489eb67
RS
434 default:
435 insert_string ("invalid");
436 return;
437 }
438
439 if (match)
440 {
441 insert_string (", matches ");
410e16ff 442 insert_char (match);
8489eb67
RS
443 }
444
445 if (start1)
446 insert_string (",\n\t is the first character of a comment-start sequence");
447 if (start2)
448 insert_string (",\n\t is the second character of a comment-start sequence");
449
450 if (end1)
451 insert_string (",\n\t is the first character of a comment-end sequence");
452 if (end2)
453 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
454 if (comstyle)
455 insert_string (" (comment style b)");
456
8489eb67
RS
457 if (prefix)
458 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
459
460 insert_string ("\n");
461}
462
340f92b5 463static Lisp_Object
8489eb67
RS
464describe_syntax_1 (vector)
465 Lisp_Object vector;
466{
467 struct buffer *old = current_buffer;
468 set_buffer_internal (XBUFFER (Vstandard_output));
b3cfe0c8 469 describe_vector (vector, Qnil, describe_syntax, 0, Qnil);
36cd82fe 470 call0 (intern ("help-mode"));
8489eb67
RS
471 set_buffer_internal (old);
472 return Qnil;
473}
474
475DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
476 "Describe the syntax specifications in the syntax table.\n\
477The descriptions are inserted in a buffer, which is then displayed.")
478 ()
479{
480 internal_with_output_to_temp_buffer
481 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
482
483 return Qnil;
484}
485\f
486/* Return the position across COUNT words from FROM.
487 If that many words cannot be found before the end of the buffer, return 0.
488 COUNT negative means scan backward and stop at word beginning. */
489
490scan_words (from, count)
491 register int from, count;
492{
493 register int beg = BEGV;
494 register int end = ZV;
495 register int code;
496
497 immediate_quit = 1;
498 QUIT;
499
500 while (count > 0)
501 {
502 while (1)
503 {
504 if (from == end)
505 {
506 immediate_quit = 0;
507 return 0;
508 }
509 code = SYNTAX (FETCH_CHAR (from));
510 if (words_include_escapes
511 && (code == Sescape || code == Scharquote))
512 break;
513 if (code == Sword)
514 break;
515 from++;
516 }
517 while (1)
518 {
519 if (from == end) break;
520 code = SYNTAX (FETCH_CHAR (from));
521 if (!(words_include_escapes
522 && (code == Sescape || code == Scharquote)))
523 if (code != Sword)
524 break;
525 from++;
526 }
527 count--;
528 }
529 while (count < 0)
530 {
531 while (1)
532 {
533 if (from == beg)
534 {
535 immediate_quit = 0;
536 return 0;
537 }
538 code = SYNTAX (FETCH_CHAR (from - 1));
539 if (words_include_escapes
540 && (code == Sescape || code == Scharquote))
541 break;
542 if (code == Sword)
543 break;
544 from--;
545 }
546 while (1)
547 {
548 if (from == beg) break;
549 code = SYNTAX (FETCH_CHAR (from - 1));
550 if (!(words_include_escapes
551 && (code == Sescape || code == Scharquote)))
552 if (code != Sword)
553 break;
554 from--;
555 }
556 count++;
557 }
558
559 immediate_quit = 0;
560
561 return from;
562}
563
564DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
565 "Move point forward ARG words (backward if ARG is negative).\n\
566Normally returns t.\n\
567If an edge of the buffer is reached, point is left there\n\
568and nil is returned.")
569 (count)
570 Lisp_Object count;
571{
572 int val;
573 CHECK_NUMBER (count, 0);
574
575 if (!(val = scan_words (point, XINT (count))))
576 {
577 SET_PT (XINT (count) > 0 ? ZV : BEGV);
578 return Qnil;
579 }
580 SET_PT (val);
581 return Qt;
582}
583\f
b3cfe0c8
RS
584DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
585 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 586Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 587Set point to where scanning stops.\n\
b3cfe0c8
RS
588If N comments are found as expected, with nothing except whitespace\n\
589between them, return t; otherwise return nil.")
590 (count)
840f481c 591 Lisp_Object count;
b3cfe0c8
RS
592{
593 register int from;
594 register int stop;
595 register int c;
596 register enum syntaxcode code;
597 int comstyle = 0; /* style of comment encountered */
be720845 598 int found;
840f481c
RS
599 int count1;
600
601 CHECK_NUMBER (count, 0);
602 count1 = XINT (count);
b3cfe0c8
RS
603
604 immediate_quit = 1;
605 QUIT;
606
607 from = PT;
608
840f481c 609 while (count1 > 0)
b3cfe0c8
RS
610 {
611 stop = ZV;
04882296 612 do
b3cfe0c8 613 {
04882296
KH
614 if (from == stop)
615 {
616 SET_PT (from);
617 return Qnil;
618 }
b3cfe0c8
RS
619 c = FETCH_CHAR (from);
620 code = SYNTAX (c);
621 from++;
622 comstyle = 0;
623 if (from < stop && SYNTAX_COMSTART_FIRST (c)
624 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
625 {
7fc8191e
RS
626 /* We have encountered a comment start sequence and we
627 are ignoring all text inside comments. We must record
b3cfe0c8
RS
628 the comment style this sequence begins so that later,
629 only a comment end of the same style actually ends
7fc8191e 630 the comment section. */
b3cfe0c8
RS
631 code = Scomment;
632 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
633 from++;
634 }
04882296
KH
635 }
636 while (code == Swhitespace || code == Sendcomment);
637 if (code != Scomment)
638 {
639 immediate_quit = 0;
640 SET_PT (from - 1);
641 return Qnil;
642 }
643 /* We're at the start of a comment. */
644 while (1)
645 {
646 if (from == stop)
b3cfe0c8
RS
647 {
648 immediate_quit = 0;
04882296 649 SET_PT (from);
b3cfe0c8
RS
650 return Qnil;
651 }
04882296 652 c = FETCH_CHAR (from);
2eb1d6a2 653 from++;
04882296
KH
654 if (SYNTAX (c) == Sendcomment
655 && SYNTAX_COMMENT_STYLE (c) == comstyle)
656 /* we have encountered a comment end of the same style
657 as the comment sequence which began this comment
658 section */
659 break;
04882296
KH
660 if (from < stop && SYNTAX_COMEND_FIRST (c)
661 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
662 && SYNTAX_COMMENT_STYLE (c) == comstyle)
663 /* we have encountered a comment end of the same style
664 as the comment sequence which began this comment
665 section */
666 { from++; break; }
b3cfe0c8 667 }
04882296 668 /* We have skipped one comment. */
840f481c 669 count1--;
b3cfe0c8
RS
670 }
671
840f481c 672 while (count1 < 0)
b3cfe0c8
RS
673 {
674 stop = BEGV;
675 while (from > stop)
676 {
677 int quoted;
678
679 from--;
680 quoted = char_quoted (from);
681 if (quoted)
682 from--;
683 c = FETCH_CHAR (from);
684 code = SYNTAX (c);
685 comstyle = 0;
7fc8191e
RS
686 if (code == Sendcomment)
687 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8
RS
688 if (from > stop && SYNTAX_COMEND_SECOND (c)
689 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
690 && !char_quoted (from - 1))
691 {
7fc8191e 692 /* We must record the comment style encountered so that
b3cfe0c8 693 later, we can match only the proper comment begin
7fc8191e 694 sequence of the same style. */
b3cfe0c8
RS
695 code = Sendcomment;
696 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
697 from--;
698 }
699
700 if (code == Sendcomment && !quoted)
701 {
8f9dc2ed 702#if 0
b3cfe0c8
RS
703 if (code != SYNTAX (c))
704 /* For a two-char comment ender, we can assume
705 it does end a comment. So scan back in a simple way. */
706 {
707 if (from != stop) from--;
708 while (1)
709 {
710 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
711 && SYNTAX_COMMENT_STYLE (c) == comstyle)
712 break;
713 if (from == stop)
714 {
715 immediate_quit = 0;
c81a3712 716 SET_PT (from);
b3cfe0c8
RS
717 return Qnil;
718 }
719 from--;
720 if (SYNTAX_COMSTART_SECOND (c)
721 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
722 && SYNTAX_COMMENT_STYLE (c) == comstyle
723 && !char_quoted (from))
724 break;
725 }
726 break;
727 }
8f9dc2ed 728#endif /* 0 */
b3cfe0c8
RS
729
730 /* Look back, counting the parity of string-quotes,
731 and recording the comment-starters seen.
732 When we reach a safe place, assume that's not in a string;
733 then step the main scan to the earliest comment-starter seen
734 an even number of string quotes away from the safe place.
735
736 OFROM[I] is position of the earliest comment-starter seen
737 which is I+2X quotes from the comment-end.
738 PARITY is current parity of quotes from the comment end. */
739 {
740 int parity = 0;
741 char my_stringend = 0;
742 int string_lossage = 0;
743 int comment_end = from;
744 int comstart_pos = 0;
745 int comstart_parity = 0;
4841063f 746 int scanstart = from - 1;
b3cfe0c8
RS
747
748 /* At beginning of range to scan, we're outside of strings;
749 that determines quote parity to the comment-end. */
750 while (from != stop)
751 {
752 /* Move back and examine a character. */
753 from--;
754
755 c = FETCH_CHAR (from);
756 code = SYNTAX (c);
757
758 /* If this char is the second of a 2-char comment sequence,
759 back up and give the pair the appropriate syntax. */
760 if (from > stop && SYNTAX_COMEND_SECOND (c)
761 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
762 {
763 code = Sendcomment;
764 from--;
4841063f 765 c = FETCH_CHAR (from);
b3cfe0c8
RS
766 }
767
4841063f
RS
768 /* If this char starts a 2-char comment start sequence,
769 treat it like a 1-char comment starter. */
770 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
771 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
772 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
773 code = Scomment;
b3cfe0c8
RS
774
775 /* Ignore escaped characters. */
776 if (char_quoted (from))
777 continue;
778
779 /* Track parity of quotes. */
780 if (code == Sstring)
781 {
782 parity ^= 1;
783 if (my_stringend == 0)
784 my_stringend = c;
785 /* If we have two kinds of string delimiters.
786 There's no way to grok this scanning backwards. */
787 else if (my_stringend != c)
788 string_lossage = 1;
789 }
790
791 /* Record comment-starters according to that
792 quote-parity to the comment-end. */
793 if (code == Scomment)
794 {
795 comstart_parity = parity;
796 comstart_pos = from;
797 }
798
799 /* If we find another earlier comment-ender,
eb8c3be9 800 any comment-starts earlier than that don't count
b3cfe0c8
RS
801 (because they go with the earlier comment-ender). */
802 if (code == Sendcomment
803 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
804 break;
805
806 /* Assume a defun-start point is outside of strings. */
807 if (code == Sopen
808 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
809 break;
810 }
811
812 if (comstart_pos == 0)
813 from = comment_end;
814 /* If the earliest comment starter
815 is followed by uniform paired string quotes or none,
816 we know it can't be inside a string
817 since if it were then the comment ender would be inside one.
818 So it does start a comment. Skip back to it. */
819 else if (comstart_parity == 0 && !string_lossage)
820 from = comstart_pos;
821 else
822 {
823 /* We had two kinds of string delimiters mixed up
824 together. Decode this going forwards.
825 Scan fwd from the previous comment ender
826 to the one in question; this records where we
827 last passed a comment starter. */
828 struct lisp_parse_state state;
829 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 830 comment_end - 1, -10000, 0, Qnil, 0);
b3cfe0c8
RS
831 if (state.incomment)
832 from = state.comstart;
833 else
834 /* We can't grok this as a comment; scan it normally. */
835 from = comment_end;
836 }
837 }
7fc8191e
RS
838 /* We have skipped one comment. */
839 break;
b3cfe0c8 840 }
c81a3712 841 else if ((code != Swhitespace && code != Scomment) || quoted)
b3cfe0c8
RS
842 {
843 immediate_quit = 0;
c81a3712 844 SET_PT (from + 1);
b3cfe0c8
RS
845 return Qnil;
846 }
847 }
848
840f481c 849 count1++;
b3cfe0c8
RS
850 }
851
852 SET_PT (from);
853 immediate_quit = 0;
854 return Qt;
855}
856\f
8489eb67
RS
857int parse_sexp_ignore_comments;
858
859Lisp_Object
860scan_lists (from, count, depth, sexpflag)
861 register int from;
862 int count, depth, sexpflag;
863{
864 Lisp_Object val;
865 register int stop;
866 register int c;
867 char stringterm;
868 int quoted;
869 int mathexit = 0;
870 register enum syntaxcode code;
871 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 872 int comstyle = 0; /* style of comment encountered */
8489eb67
RS
873
874 if (depth > 0) min_depth = 0;
875
876 immediate_quit = 1;
877 QUIT;
878
879 while (count > 0)
880 {
881 stop = ZV;
882 while (from < stop)
883 {
884 c = FETCH_CHAR (from);
e5d4f4dc 885 code = SYNTAX (c);
8489eb67
RS
886 from++;
887 if (from < stop && SYNTAX_COMSTART_FIRST (c)
888 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
889 && parse_sexp_ignore_comments)
e5d4f4dc
RS
890 {
891 /* we have encountered a comment start sequence and we
892 are ignoring all text inside comments. we must record
893 the comment style this sequence begins so that later,
894 only a comment end of the same style actually ends
895 the comment section */
896 code = Scomment;
897 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
898 from++;
899 }
900
8489eb67
RS
901 if (SYNTAX_PREFIX (c))
902 continue;
903
0220c518 904 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
905 {
906 case Sescape:
907 case Scharquote:
908 if (from == stop) goto lose;
909 from++;
910 /* treat following character as a word constituent */
911 case Sword:
912 case Ssymbol:
913 if (depth || !sexpflag) break;
914 /* This word counts as a sexp; return at end of it. */
915 while (from < stop)
916 {
0220c518 917 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
918 {
919 case Scharquote:
920 case Sescape:
921 from++;
922 if (from == stop) goto lose;
923 break;
924 case Sword:
925 case Ssymbol:
926 case Squote:
927 break;
928 default:
929 goto done;
930 }
931 from++;
932 }
933 goto done;
934
935 case Scomment:
936 if (!parse_sexp_ignore_comments) break;
937 while (1)
938 {
c01f7ec0
RS
939 if (from == stop)
940 {
941 if (depth == 0)
942 goto done;
943 goto lose;
944 }
e5d4f4dc
RS
945 c = FETCH_CHAR (from);
946 if (SYNTAX (c) == Sendcomment
947 && SYNTAX_COMMENT_STYLE (c) == comstyle)
948 /* we have encountered a comment end of the same style
949 as the comment sequence which began this comment
950 section */
8489eb67
RS
951 break;
952 from++;
953 if (from < stop && SYNTAX_COMEND_FIRST (c)
e5d4f4dc
RS
954 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
955 && SYNTAX_COMMENT_STYLE (c) == comstyle)
956 /* we have encountered a comment end of the same style
957 as the comment sequence which began this comment
958 section */
8489eb67
RS
959 { from++; break; }
960 }
961 break;
962
963 case Smath:
964 if (!sexpflag)
965 break;
966 if (from != stop && c == FETCH_CHAR (from))
967 from++;
968 if (mathexit)
969 {
970 mathexit = 0;
971 goto close1;
972 }
973 mathexit = 1;
974
975 case Sopen:
976 if (!++depth) goto done;
977 break;
978
979 case Sclose:
980 close1:
981 if (!--depth) goto done;
982 if (depth < min_depth)
983 error ("Containing expression ends prematurely");
984 break;
985
986 case Sstring:
987 stringterm = FETCH_CHAR (from - 1);
988 while (1)
989 {
990 if (from >= stop) goto lose;
991 if (FETCH_CHAR (from) == stringterm) break;
0220c518 992 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
993 {
994 case Scharquote:
995 case Sescape:
996 from++;
997 }
998 from++;
999 }
1000 from++;
1001 if (!depth && sexpflag) goto done;
1002 break;
1003 }
1004 }
1005
1006 /* Reached end of buffer. Error if within object, return nil if between */
1007 if (depth) goto lose;
1008
1009 immediate_quit = 0;
1010 return Qnil;
1011
1012 /* End of object reached */
1013 done:
1014 count--;
1015 }
1016
1017 while (count < 0)
1018 {
1019 stop = BEGV;
1020 while (from > stop)
1021 {
1022 from--;
1023 if (quoted = char_quoted (from))
1024 from--;
1025 c = FETCH_CHAR (from);
1026 code = SYNTAX (c);
7fc8191e
RS
1027 comstyle = 0;
1028 if (code == Sendcomment)
1029 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67
RS
1030 if (from > stop && SYNTAX_COMEND_SECOND (c)
1031 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
1032 && !char_quoted (from - 1)
1033 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1034 {
1035 /* we must record the comment style encountered so that
1036 later, we can match only the proper comment begin
1037 sequence of the same style */
1038 code = Sendcomment;
1039 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
1040 from--;
1041 }
1042
8489eb67
RS
1043 if (SYNTAX_PREFIX (c))
1044 continue;
1045
0220c518 1046 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
8489eb67
RS
1047 {
1048 case Sword:
1049 case Ssymbol:
1050 if (depth || !sexpflag) break;
1051 /* This word counts as a sexp; count object finished after passing it. */
1052 while (from > stop)
1053 {
1054 quoted = char_quoted (from - 1);
1055 if (quoted)
1056 from--;
e5d4f4dc
RS
1057 if (! (quoted || SYNTAX (FETCH_CHAR (from - 1)) == Sword
1058 || SYNTAX (FETCH_CHAR (from - 1)) == Ssymbol
1059 || SYNTAX (FETCH_CHAR (from - 1)) == Squote))
8489eb67
RS
1060 goto done2;
1061 from--;
1062 }
1063 goto done2;
1064
1065 case Smath:
1066 if (!sexpflag)
1067 break;
1068 if (from != stop && c == FETCH_CHAR (from - 1))
1069 from--;
1070 if (mathexit)
1071 {
1072 mathexit = 0;
1073 goto open2;
1074 }
1075 mathexit = 1;
1076
1077 case Sclose:
1078 if (!++depth) goto done2;
1079 break;
1080
1081 case Sopen:
1082 open2:
1083 if (!--depth) goto done2;
1084 if (depth < min_depth)
1085 error ("Containing expression ends prematurely");
1086 break;
1087
1088 case Sendcomment:
1089 if (!parse_sexp_ignore_comments)
1090 break;
8f9dc2ed 1091#if 0
37bef230
RS
1092 if (code != SYNTAX (c))
1093 /* For a two-char comment ender, we can assume
1094 it does end a comment. So scan back in a simple way. */
1095 {
1096 if (from != stop) from--;
1097 while (1)
1098 {
1099 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1100 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1101 break;
c01f7ec0
RS
1102 if (from == stop)
1103 {
1104 if (depth == 0)
1105 goto done2;
1106 goto lose;
1107 }
37bef230
RS
1108 from--;
1109 if (SYNTAX_COMSTART_SECOND (c)
1110 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1111 && SYNTAX_COMMENT_STYLE (c) == comstyle
1112 && !char_quoted (from))
1113 break;
1114 }
1115 break;
1116 }
8f9dc2ed 1117#endif /* 0 */
37bef230 1118
8489eb67
RS
1119 /* Look back, counting the parity of string-quotes,
1120 and recording the comment-starters seen.
1121 When we reach a safe place, assume that's not in a string;
1122 then step the main scan to the earliest comment-starter seen
1123 an even number of string quotes away from the safe place.
1124
1125 OFROM[I] is position of the earliest comment-starter seen
1126 which is I+2X quotes from the comment-end.
1127 PARITY is current parity of quotes from the comment end. */
1128 {
8489eb67 1129 int parity = 0;
e5d4f4dc
RS
1130 char my_stringend = 0;
1131 int string_lossage = 0;
1132 int comment_end = from;
37bef230
RS
1133 int comstart_pos = 0;
1134 int comstart_parity = 0;
4841063f 1135 int scanstart = from - 1;
8489eb67
RS
1136
1137 /* At beginning of range to scan, we're outside of strings;
1138 that determines quote parity to the comment-end. */
1139 while (from != stop)
1140 {
1141 /* Move back and examine a character. */
1142 from--;
1143
1144 c = FETCH_CHAR (from);
1145 code = SYNTAX (c);
1146
1147 /* If this char is the second of a 2-char comment sequence,
1148 back up and give the pair the appropriate syntax. */
1149 if (from > stop && SYNTAX_COMEND_SECOND (c)
1150 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
e5d4f4dc
RS
1151 {
1152 code = Sendcomment;
1153 from--;
4841063f 1154 c = FETCH_CHAR (from);
e5d4f4dc
RS
1155 }
1156
4841063f
RS
1157 /* If this char starts a 2-char comment start sequence,
1158 treat it like a 1-char comment starter. */
1159 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1160 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
1161 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
1162 code = Scomment;
8489eb67
RS
1163
1164 /* Ignore escaped characters. */
1165 if (char_quoted (from))
1166 continue;
1167
37bef230 1168 /* Track parity of quotes. */
8489eb67 1169 if (code == Sstring)
e5d4f4dc
RS
1170 {
1171 parity ^= 1;
1172 if (my_stringend == 0)
1173 my_stringend = c;
37bef230 1174 /* If we have two kinds of string delimiters.
e5d4f4dc
RS
1175 There's no way to grok this scanning backwards. */
1176 else if (my_stringend != c)
1177 string_lossage = 1;
1178 }
8489eb67
RS
1179
1180 /* Record comment-starters according to that
1181 quote-parity to the comment-end. */
1182 if (code == Scomment)
37bef230
RS
1183 {
1184 comstart_parity = parity;
1185 comstart_pos = from;
1186 }
8489eb67 1187
37bef230 1188 /* If we find another earlier comment-ender,
eb8c3be9 1189 any comment-starts earlier than that don't count
37bef230 1190 (because they go with the earlier comment-ender). */
e5d4f4dc
RS
1191 if (code == Sendcomment
1192 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
8489eb67 1193 break;
37bef230
RS
1194
1195 /* Assume a defun-start point is outside of strings. */
1196 if (code == Sopen
1197 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
1198 break;
8489eb67 1199 }
37bef230
RS
1200
1201 if (comstart_pos == 0)
1202 from = comment_end;
1203 /* If the earliest comment starter
1204 is followed by uniform paired string quotes or none,
1205 we know it can't be inside a string
1206 since if it were then the comment ender would be inside one.
1207 So it does start a comment. Skip back to it. */
1208 else if (comstart_parity == 0 && !string_lossage)
1209 from = comstart_pos;
1210 else
e5d4f4dc
RS
1211 {
1212 /* We had two kinds of string delimiters mixed up
1213 together. Decode this going forwards.
1214 Scan fwd from the previous comment ender
1215 to the one in question; this records where we
1216 last passed a comment starter. */
1217 struct lisp_parse_state state;
37bef230 1218 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 1219 comment_end - 1, -10000, 0, Qnil, 0);
e5d4f4dc
RS
1220 if (state.incomment)
1221 from = state.comstart;
1222 else
1223 /* We can't grok this as a comment; scan it normally. */
1224 from = comment_end;
1225 }
8489eb67
RS
1226 }
1227 break;
1228
1229 case Sstring:
1230 stringterm = FETCH_CHAR (from);
1231 while (1)
1232 {
1233 if (from == stop) goto lose;
1234 if (!char_quoted (from - 1)
1235 && stringterm == FETCH_CHAR (from - 1))
1236 break;
1237 from--;
1238 }
1239 from--;
1240 if (!depth && sexpflag) goto done2;
1241 break;
1242 }
1243 }
1244
1245 /* Reached start of buffer. Error if within object, return nil if between */
1246 if (depth) goto lose;
1247
1248 immediate_quit = 0;
1249 return Qnil;
1250
1251 done2:
1252 count++;
1253 }
1254
1255
1256 immediate_quit = 0;
1e142fb7 1257 XSETFASTINT (val, from);
8489eb67
RS
1258 return val;
1259
1260 lose:
1261 error ("Unbalanced parentheses");
1262 /* NOTREACHED */
1263}
1264
340f92b5 1265static int
8489eb67
RS
1266char_quoted (pos)
1267 register int pos;
1268{
1269 register enum syntaxcode code;
1270 register int beg = BEGV;
1271 register int quoted = 0;
1272
1273 while (pos > beg
1274 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
1275 || code == Sescape))
1276 pos--, quoted = !quoted;
1277 return quoted;
1278}
1279
1280DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1281 "Scan from character number FROM by COUNT lists.\n\
1282Returns the character number of the position thus found.\n\
1283\n\
1284If DEPTH is nonzero, paren depth begins counting from that value,\n\
1285only places where the depth in parentheses becomes zero\n\
1286are candidates for stopping; COUNT such places are counted.\n\
1287Thus, a positive value for DEPTH means go out levels.\n\
1288\n\
1289Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1290\n\
1291If the beginning or end of (the accessible part of) the buffer is reached\n\
1292and the depth is wrong, an error is signaled.\n\
1293If the depth is right but the count is not used up, nil is returned.")
1294 (from, count, depth)
1295 Lisp_Object from, count, depth;
1296{
1297 CHECK_NUMBER (from, 0);
1298 CHECK_NUMBER (count, 1);
1299 CHECK_NUMBER (depth, 2);
1300
1301 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1302}
1303
1304DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1305 "Scan from character number FROM by COUNT balanced expressions.\n\
1306If COUNT is negative, scan backwards.\n\
1307Returns the character number of the position thus found.\n\
1308\n\
1309Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1310\n\
1311If the beginning or end of (the accessible part of) the buffer is reached\n\
1312in the middle of a parenthetical grouping, an error is signaled.\n\
1313If the beginning or end is reached between groupings\n\
1314but before count is used up, nil is returned.")
1315 (from, count)
1316 Lisp_Object from, count;
1317{
1318 CHECK_NUMBER (from, 0);
1319 CHECK_NUMBER (count, 1);
1320
1321 return scan_lists (XINT (from), XINT (count), 0, 1);
1322}
1323
1324DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1325 0, 0, 0,
1326 "Move point backward over any number of chars with prefix syntax.\n\
1327This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1328 ()
1329{
1330 int beg = BEGV;
1331 int pos = point;
1332
1333 while (pos > beg && !char_quoted (pos - 1)
1334 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
1335 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
1336 pos--;
1337
1338 SET_PT (pos);
1339
1340 return Qnil;
1341}
1342\f
8489eb67 1343/* Parse forward from FROM to END,
e5d4f4dc
RS
1344 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1345 and return a description of the state of the parse at END.
c81a3712
RS
1346 If STOPBEFORE is nonzero, stop at the start of an atom.
1347 If COMMENTSTOP is nonzero, stop at the start of a comment. */
8489eb67 1348
340f92b5 1349static void
c81a3712
RS
1350scan_sexps_forward (stateptr, from, end, targetdepth,
1351 stopbefore, oldstate, commentstop)
e5d4f4dc 1352 struct lisp_parse_state *stateptr;
8489eb67
RS
1353 register int from;
1354 int end, targetdepth, stopbefore;
1355 Lisp_Object oldstate;
c81a3712 1356 int commentstop;
8489eb67
RS
1357{
1358 struct lisp_parse_state state;
1359
1360 register enum syntaxcode code;
1361 struct level { int last, prev; };
1362 struct level levelstart[100];
1363 register struct level *curlevel = levelstart;
1364 struct level *endlevel = levelstart + 100;
1365 char prev;
1366 register int depth; /* Paren depth of current scanning location.
1367 level - levelstart equals this except
1368 when the depth becomes negative. */
1369 int mindepth; /* Lowest DEPTH value seen. */
1370 int start_quoted = 0; /* Nonzero means starting after a char quote */
1371 Lisp_Object tem;
1372
1373 immediate_quit = 1;
1374 QUIT;
1375
265a9e55 1376 if (NILP (oldstate))
8489eb67
RS
1377 {
1378 depth = 0;
1379 state.instring = -1;
1380 state.incomment = 0;
e5d4f4dc 1381 state.comstyle = 0; /* comment style a by default */
8489eb67
RS
1382 }
1383 else
1384 {
1385 tem = Fcar (oldstate);
265a9e55 1386 if (!NILP (tem))
8489eb67
RS
1387 depth = XINT (tem);
1388 else
1389 depth = 0;
1390
1391 oldstate = Fcdr (oldstate);
1392 oldstate = Fcdr (oldstate);
1393 oldstate = Fcdr (oldstate);
1394 tem = Fcar (oldstate);
265a9e55 1395 state.instring = !NILP (tem) ? XINT (tem) : -1;
8489eb67
RS
1396
1397 oldstate = Fcdr (oldstate);
1398 tem = Fcar (oldstate);
265a9e55 1399 state.incomment = !NILP (tem);
8489eb67
RS
1400
1401 oldstate = Fcdr (oldstate);
1402 tem = Fcar (oldstate);
265a9e55 1403 start_quoted = !NILP (tem);
e5d4f4dc
RS
1404
1405 /* if the eight element of the list is nil, we are in comment
1406 style a. if it is non-nil, we are in comment style b */
1407 oldstate = Fcdr (oldstate);
1408 oldstate = Fcdr (oldstate);
e5d4f4dc
RS
1409 tem = Fcar (oldstate);
1410 state.comstyle = !NILP (tem);
8489eb67
RS
1411 }
1412 state.quoted = 0;
1413 mindepth = depth;
1414
1415 curlevel->prev = -1;
1416 curlevel->last = -1;
1417
1418 /* Enter the loop at a place appropriate for initial state. */
1419
1420 if (state.incomment) goto startincomment;
1421 if (state.instring >= 0)
1422 {
1423 if (start_quoted) goto startquotedinstring;
1424 goto startinstring;
1425 }
1426 if (start_quoted) goto startquoted;
1427
1428 while (from < end)
1429 {
e5d4f4dc 1430 code = SYNTAX (FETCH_CHAR (from));
8489eb67 1431 from++;
8f9dc2ed
RS
1432 if (code == Scomment)
1433 state.comstart = from-1;
1434
1435 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1436 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
e5d4f4dc
RS
1437 {
1438 /* Record the comment style we have entered so that only
1439 the comment-end sequence of the same style actually
1440 terminates the comment section. */
1441 code = Scomment;
1442 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
8f9dc2ed 1443 state.comstart = from-1;
e5d4f4dc
RS
1444 from++;
1445 }
1446
8489eb67
RS
1447 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
1448 continue;
0220c518 1449 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1450 {
1451 case Sescape:
1452 case Scharquote:
1453 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1454 curlevel->last = from - 1;
1455 startquoted:
1456 if (from == end) goto endquoted;
1457 from++;
1458 goto symstarted;
1459 /* treat following character as a word constituent */
1460 case Sword:
1461 case Ssymbol:
1462 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1463 curlevel->last = from - 1;
1464 symstarted:
1465 while (from < end)
1466 {
0220c518 1467 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1468 {
1469 case Scharquote:
1470 case Sescape:
1471 from++;
1472 if (from == end) goto endquoted;
1473 break;
1474 case Sword:
1475 case Ssymbol:
1476 case Squote:
1477 break;
1478 default:
1479 goto symdone;
1480 }
1481 from++;
1482 }
1483 symdone:
1484 curlevel->prev = curlevel->last;
1485 break;
1486
5a28e48c
RS
1487 startincomment:
1488 if (commentstop)
1489 goto done;
1490 if (from != BEGV)
1491 {
1492 /* Enter the loop in the middle so that we find
1493 a 2-char comment ender if we start in the middle of it. */
1494 prev = FETCH_CHAR (from - 1);
1495 goto startincomment_1;
1496 }
1497 /* At beginning of buffer, enter the loop the ordinary way. */
1498
8489eb67
RS
1499 case Scomment:
1500 state.incomment = 1;
c81a3712
RS
1501 if (commentstop)
1502 goto done;
8489eb67
RS
1503 while (1)
1504 {
1505 if (from == end) goto done;
e5d4f4dc
RS
1506 prev = FETCH_CHAR (from);
1507 if (SYNTAX (prev) == Sendcomment
1508 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1509 /* Only terminate the comment section if the endcomment
1510 of the same style as the start sequence has been
1511 encountered. */
8489eb67
RS
1512 break;
1513 from++;
5a28e48c 1514 startincomment_1:
8489eb67 1515 if (from < end && SYNTAX_COMEND_FIRST (prev)
e5d4f4dc
RS
1516 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1517 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1518 /* Only terminate the comment section if the end-comment
1519 sequence of the same style as the start sequence has
1520 been encountered. */
8489eb67
RS
1521 { from++; break; }
1522 }
1523 state.incomment = 0;
e5d4f4dc 1524 state.comstyle = 0; /* reset the comment style */
8489eb67
RS
1525 break;
1526
1527 case Sopen:
1528 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1529 depth++;
1530 /* curlevel++->last ran into compiler bug on Apollo */
1531 curlevel->last = from - 1;
1532 if (++curlevel == endlevel)
1533 error ("Nesting too deep for parser");
1534 curlevel->prev = -1;
1535 curlevel->last = -1;
1536 if (!--targetdepth) goto done;
1537 break;
1538
1539 case Sclose:
1540 depth--;
1541 if (depth < mindepth)
1542 mindepth = depth;
1543 if (curlevel != levelstart)
1544 curlevel--;
1545 curlevel->prev = curlevel->last;
1546 if (!++targetdepth) goto done;
1547 break;
1548
1549 case Sstring:
1550 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1551 curlevel->last = from - 1;
1552 state.instring = FETCH_CHAR (from - 1);
1553 startinstring:
1554 while (1)
1555 {
1556 if (from >= end) goto done;
1557 if (FETCH_CHAR (from) == state.instring) break;
0220c518 1558 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1559 {
1560 case Scharquote:
1561 case Sescape:
1562 from++;
1563 startquotedinstring:
1564 if (from >= end) goto endquoted;
1565 }
1566 from++;
1567 }
1568 state.instring = -1;
1569 curlevel->prev = curlevel->last;
1570 from++;
1571 break;
1572
1573 case Smath:
1574 break;
1575 }
1576 }
1577 goto done;
1578
1579 stop: /* Here if stopping before start of sexp. */
1580 from--; /* We have just fetched the char that starts it; */
1581 goto done; /* but return the position before it. */
1582
1583 endquoted:
1584 state.quoted = 1;
1585 done:
1586 state.depth = depth;
1587 state.mindepth = mindepth;
1588 state.thislevelstart = curlevel->prev;
1589 state.prevlevelstart
1590 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1591 state.location = from;
1592 immediate_quit = 0;
1593
e5d4f4dc 1594 *stateptr = state;
8489eb67
RS
1595}
1596
1597/* This comment supplies the doc string for parse-partial-sexp,
1598 for make-docfile to see. We cannot put this in the real DEFUN
1599 due to limits in the Unix cpp.
1600
c81a3712 1601DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
1602 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1603Parsing stops at TO or when certain criteria are met;\n\
1604 point is set to where parsing stops.\n\
1605If fifth arg STATE is omitted or nil,\n\
1606 parsing assumes that FROM is the beginning of a function.\n\
e5d4f4dc 1607Value is a list of eight elements describing final state of parsing:\n\
af50f9e5
RS
1608 0. depth in parens.\n\
1609 1. character address of start of innermost containing list; nil if none.\n\
1610 2. character address of start of last complete sexp terminated.\n\
1611 3. non-nil if inside a string.\n\
8489eb67 1612 (it is the character that will terminate the string.)\n\
af50f9e5
RS
1613 4. t if inside a comment.\n\
1614 5. t if following a quote character.\n\
1615 6. the minimum paren-depth encountered during this scan.\n\
1616 7. t if in a comment of style `b'.\n\
8489eb67
RS
1617If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1618in parentheses becomes equal to TARGETDEPTH.\n\
1619Fourth arg STOPBEFORE non-nil means stop when come to\n\
1620 any character that starts a sexp.\n\
ec11639d 1621Fifth arg STATE is an eight-list like what this function returns.\n\
a4275ad1 1622It is used to initialize the state of the parse. Its second and third
c81a3712
RS
1623elements are ignored.
1624Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1625 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
1626*/
1627
c81a3712 1628DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 1629 0 /* See immediately above */)
c81a3712
RS
1630 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1631 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
1632{
1633 struct lisp_parse_state state;
1634 int target;
1635
265a9e55 1636 if (!NILP (targetdepth))
8489eb67
RS
1637 {
1638 CHECK_NUMBER (targetdepth, 3);
1639 target = XINT (targetdepth);
1640 }
1641 else
1642 target = -100000; /* We won't reach this depth */
1643
1644 validate_region (&from, &to);
e5d4f4dc 1645 scan_sexps_forward (&state, XINT (from), XINT (to),
c81a3712
RS
1646 target, !NILP (stopbefore), oldstate,
1647 !NILP (commentstop));
8489eb67
RS
1648
1649 SET_PT (state.location);
1650
1651 return Fcons (make_number (state.depth),
1652 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1653 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1654 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1655 Fcons (state.incomment ? Qt : Qnil,
1656 Fcons (state.quoted ? Qt : Qnil,
e5d4f4dc
RS
1657 Fcons (make_number (state.mindepth),
1658 Fcons (state.comstyle ? Qt : Qnil,
1659 Qnil))))))));
8489eb67
RS
1660}
1661\f
1662init_syntax_once ()
1663{
1664 register int i;
1665 register struct Lisp_Vector *v;
1666
1667 /* Set this now, so first buffer creation can refer to it. */
1668 /* Make it nil before calling copy-syntax-table
1669 so that copy-syntax-table will know not to try to copy from garbage */
1670 Vstandard_syntax_table = Qnil;
1671 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1672
1673 v = XVECTOR (Vstandard_syntax_table);
1674
1675 for (i = 'a'; i <= 'z'; i++)
1e142fb7 1676 XSETFASTINT (v->contents[i], (int) Sword);
8489eb67 1677 for (i = 'A'; i <= 'Z'; i++)
1e142fb7 1678 XSETFASTINT (v->contents[i], (int) Sword);
8489eb67 1679 for (i = '0'; i <= '9'; i++)
1e142fb7
KH
1680 XSETFASTINT (v->contents[i], (int) Sword);
1681 XSETFASTINT (v->contents['$'], (int) Sword);
1682 XSETFASTINT (v->contents['%'], (int) Sword);
1683
1684 XSETFASTINT (v->contents['('], (int) Sopen + (')' << 8));
1685 XSETFASTINT (v->contents[')'], (int) Sclose + ('(' << 8));
1686 XSETFASTINT (v->contents['['], (int) Sopen + (']' << 8));
1687 XSETFASTINT (v->contents[']'], (int) Sclose + ('[' << 8));
1688 XSETFASTINT (v->contents['{'], (int) Sopen + ('}' << 8));
1689 XSETFASTINT (v->contents['}'], (int) Sclose + ('{' << 8));
1690 XSETFASTINT (v->contents['"'], (int) Sstring);
1691 XSETFASTINT (v->contents['\\'], (int) Sescape);
8489eb67
RS
1692
1693 for (i = 0; i < 10; i++)
1e142fb7 1694 XSETFASTINT (v->contents["_-+*/&|<>="[i]], (int) Ssymbol);
8489eb67
RS
1695
1696 for (i = 0; i < 12; i++)
1e142fb7 1697 XSETFASTINT (v->contents[".,;:?!#@~^'`"[i]], (int) Spunct);
8489eb67
RS
1698}
1699
1700syms_of_syntax ()
1701{
1702 Qsyntax_table_p = intern ("syntax-table-p");
1703 staticpro (&Qsyntax_table_p);
1704
1705 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1706 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1707
1708 words_include_escapes = 0;
1709 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1710 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1711
1712 defsubr (&Ssyntax_table_p);
1713 defsubr (&Ssyntax_table);
1714 defsubr (&Sstandard_syntax_table);
1715 defsubr (&Scopy_syntax_table);
1716 defsubr (&Sset_syntax_table);
1717 defsubr (&Schar_syntax);
beefa22e 1718 defsubr (&Smatching_paren);
8489eb67
RS
1719 defsubr (&Smodify_syntax_entry);
1720 defsubr (&Sdescribe_syntax);
1721
1722 defsubr (&Sforward_word);
1723
b3cfe0c8 1724 defsubr (&Sforward_comment);
8489eb67
RS
1725 defsubr (&Sscan_lists);
1726 defsubr (&Sscan_sexps);
1727 defsubr (&Sbackward_prefix_chars);
1728 defsubr (&Sparse_partial_sexp);
1729}