Use char tables as syntax tables.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
0220c518 2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
18160b98 21#include <config.h>
8489eb67
RS
22#include <ctype.h>
23#include "lisp.h"
24#include "commands.h"
25#include "buffer.h"
26#include "syntax.h"
27
28Lisp_Object Qsyntax_table_p;
29
340f92b5
RS
30static void scan_sexps_forward ();
31static int char_quoted ();
32
8489eb67
RS
33int words_include_escapes;
34
e5d4f4dc
RS
35/* This is the internal form of the parse state used in parse-partial-sexp. */
36
37struct lisp_parse_state
38 {
39 int depth; /* Depth at end of parsing */
40 int instring; /* -1 if not within string, else desired terminator. */
41 int incomment; /* Nonzero if within a comment at end of parsing */
42 int comstyle; /* comment style a=0, or b=1 */
43 int quoted; /* Nonzero if just after an escape char at end of parsing */
44 int thislevelstart; /* Char number of most recent start-of-expression at current level */
45 int prevlevelstart; /* Char number of start of containing expression */
46 int location; /* Char number at which parsing stopped. */
47 int mindepth; /* Minimum depth seen while scanning. */
48 int comstart; /* Position just after last comment starter. */
49 };
50\f
37bef230
RS
51/* These variables are a cache for finding the start of a defun.
52 find_start_pos is the place for which the defun start was found.
53 find_start_value is the defun start position found for it.
54 find_start_buffer is the buffer it was found in.
55 find_start_begv is the BEGV value when it was found.
56 find_start_modiff is the value of MODIFF when it was found. */
57
58static int find_start_pos;
59static int find_start_value;
60static struct buffer *find_start_buffer;
61static int find_start_begv;
62static int find_start_modiff;
63
64/* Find a defun-start that is the last one before POS (or nearly the last).
65 We record what we find, so that another call in the same area
66 can return the same value right away. */
67
68static int
69find_defun_start (pos)
70 int pos;
71{
72 int tem;
73 int shortage;
74
75 /* Use previous finding, if it's valid and applies to this inquiry. */
76 if (current_buffer == find_start_buffer
77 /* Reuse the defun-start even if POS is a little farther on.
78 POS might be in the next defun, but that's ok.
79 Our value may not be the best possible, but will still be usable. */
80 && pos <= find_start_pos + 1000
81 && pos >= find_start_value
82 && BEGV == find_start_begv
83 && MODIFF == find_start_modiff)
84 return find_start_value;
85
86 /* Back up to start of line. */
be5357e2 87 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
37bef230
RS
88
89 while (tem > BEGV)
90 {
91 /* Open-paren at start of line means we found our defun-start. */
92 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
93 break;
94 /* Move to beg of previous line. */
be5357e2 95 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
37bef230
RS
96 }
97
98 /* Record what we found, for the next try. */
99 find_start_value = tem;
100 find_start_buffer = current_buffer;
101 find_start_modiff = MODIFF;
102 find_start_begv = BEGV;
103 find_start_pos = pos;
104
105 return find_start_value;
106}
107\f
8489eb67
RS
108DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
109 "Return t if ARG is a syntax table.\n\
110Any vector of 256 elements will do.")
111 (obj)
112 Lisp_Object obj;
113{
330cf232 114 if (VECTORP (obj) && XVECTOR (obj)->size == 0400)
8489eb67
RS
115 return Qt;
116 return Qnil;
117}
118
119Lisp_Object
120check_syntax_table (obj)
121 Lisp_Object obj;
122{
123 register Lisp_Object tem;
124 while (tem = Fsyntax_table_p (obj),
265a9e55 125 NILP (tem))
b3cfe0c8 126 obj = wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
127 return obj;
128}
129
130
131DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
132 "Return the current syntax table.\n\
133This is the one specified by the current buffer.")
134 ()
135{
136 return current_buffer->syntax_table;
137}
138
139DEFUN ("standard-syntax-table", Fstandard_syntax_table,
140 Sstandard_syntax_table, 0, 0, 0,
141 "Return the standard syntax table.\n\
142This is the one used for new buffers.")
143 ()
144{
145 return Vstandard_syntax_table;
146}
147
148DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
149 "Construct a new syntax table and return it.\n\
150It is a copy of the TABLE, which defaults to the standard syntax table.")
151 (table)
152 Lisp_Object table;
153{
154 Lisp_Object size, val;
1e142fb7
KH
155 XSETFASTINT (size, 0400);
156 XSETFASTINT (val, 0);
8489eb67 157 val = Fmake_vector (size, val);
265a9e55 158 if (!NILP (table))
8489eb67 159 table = check_syntax_table (table);
265a9e55 160 else if (NILP (Vstandard_syntax_table))
8489eb67
RS
161 /* Can only be null during initialization */
162 return val;
163 else table = Vstandard_syntax_table;
164
165 bcopy (XVECTOR (table)->contents,
166 XVECTOR (val)->contents, 0400 * sizeof (Lisp_Object));
167 return val;
168}
169
170DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
171 "Select a new syntax table for the current buffer.\n\
172One argument, a syntax table.")
173 (table)
174 Lisp_Object table;
175{
176 table = check_syntax_table (table);
177 current_buffer->syntax_table = table;
178 /* Indicate that this buffer now has a specified syntax table. */
f110a664
RS
179 current_buffer->local_var_flags
180 |= XFASTINT (buffer_local_flags.syntax_table);
8489eb67
RS
181 return table;
182}
183\f
184/* Convert a letter which signifies a syntax code
185 into the code it signifies.
186 This is used by modify-syntax-entry, and other things. */
187
188unsigned char syntax_spec_code[0400] =
189 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
193 (char) Swhitespace, 0377, (char) Sstring, 0377,
194 (char) Smath, 0377, 0377, (char) Squote,
195 (char) Sopen, (char) Sclose, 0377, 0377,
196 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
197 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
198 0377, 0377, 0377, 0377,
199 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 200 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
203 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
207 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
208 };
209
210/* Indexed by syntax code, give the letter that describes it. */
211
6cb71bf6 212char syntax_code_spec[14] =
8489eb67 213 {
6cb71bf6 214 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
8489eb67
RS
215 };
216\f
217DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
218 "Return the syntax code of CHAR, described by a character.\n\
219For example, if CHAR is a word constituent, the character `?w' is returned.\n\
220The characters that correspond to various syntax codes\n\
221are listed in the documentation of `modify-syntax-entry'.")
222 (ch)
223 Lisp_Object ch;
224{
225 CHECK_NUMBER (ch, 0);
beefa22e
RS
226 return make_number (syntax_code_spec[(int) SYNTAX (XINT (ch))]);
227}
228
229DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
230 "Return the matching parenthesis of CHAR, or nil if none.")
231 (ch)
232 Lisp_Object ch;
233{
234 int code;
235 CHECK_NUMBER (ch, 0);
236 code = SYNTAX (XINT (ch));
a8bd7cd8 237 if (code == Sopen || code == Sclose)
beefa22e
RS
238 return make_number (SYNTAX_MATCH (XINT (ch)));
239 return Qnil;
8489eb67
RS
240}
241
242/* This comment supplies the doc string for modify-syntax-entry,
243 for make-docfile to see. We cannot put this in the real DEFUN
244 due to limits in the Unix cpp.
245
2fcea188 246DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
247 "Set syntax for character CHAR according to string S.\n\
248The syntax is changed only for table TABLE, which defaults to\n\
249 the current buffer's syntax table.\n\
250The first character of S should be one of the following:\n\
32676c08
JB
251 Space or - whitespace syntax. w word constituent.\n\
252 _ symbol constituent. . punctuation.\n\
253 ( open-parenthesis. ) close-parenthesis.\n\
254 \" string quote. \\ escape.\n\
255 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 256 < comment starter. > comment ender.\n\
6cb71bf6 257 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 258\n\
8489eb67
RS
259Only single-character comment start and end sequences are represented thus.\n\
260Two-character sequences are represented as described below.\n\
261The second character of S is the matching parenthesis,\n\
262 used only if the first character is `(' or `)'.\n\
263Any additional characters are flags.\n\
e5d4f4dc 264Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
8489eb67
RS
265 1 means C is the start of a two-char comment start sequence.\n\
266 2 means C is the second character of such a sequence.\n\
267 3 means C is the start of a two-char comment end sequence.\n\
268 4 means C is the second character of such a sequence.\n\
e5d4f4dc
RS
269\n\
270There can be up to two orthogonal comment sequences. This is to support\n\
271language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
272a, but you can set the comment sequence style to b (on the second character\n\
273of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc
RS
274this flag:\n\
275 b means C is part of comment sequence b.\n\
276\n\
277 p means C is a prefix character for `backward-prefix-chars';\n\
278 such characters are treated as whitespace when they occur\n\
8489eb67 279 between expressions.")
2fcea188 280 (char, s, table)
8489eb67
RS
281*/
282
283DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
284 /* I really don't know why this is interactive
285 help-form should at least be made useful whilst reading the second arg
286 */
287 "cSet syntax for character: \nsSet syntax for %s to: ",
288 0 /* See immediately above */)
289 (c, newentry, syntax_table)
290 Lisp_Object c, newentry, syntax_table;
291{
292 register unsigned char *p, match;
293 register enum syntaxcode code;
247e20a8 294 int val;
8489eb67
RS
295
296 CHECK_NUMBER (c, 0);
297 CHECK_STRING (newentry, 1);
265a9e55 298 if (NILP (syntax_table))
8489eb67
RS
299 syntax_table = current_buffer->syntax_table;
300 else
301 syntax_table = check_syntax_table (syntax_table);
302
303 p = XSTRING (newentry)->data;
304 code = (enum syntaxcode) syntax_spec_code[*p++];
305 if (((int) code & 0377) == 0377)
306 error ("invalid syntax description letter: %c", c);
307
308 match = *p;
309 if (match) p++;
310 if (match == ' ') match = 0;
311
247e20a8 312 val = (match << 8) + (int) code;
8489eb67
RS
313 while (*p)
314 switch (*p++)
315 {
316 case '1':
247e20a8 317 val |= 1 << 16;
8489eb67
RS
318 break;
319
320 case '2':
247e20a8 321 val |= 1 << 17;
8489eb67
RS
322 break;
323
324 case '3':
247e20a8 325 val |= 1 << 18;
8489eb67
RS
326 break;
327
328 case '4':
247e20a8 329 val |= 1 << 19;
8489eb67
RS
330 break;
331
332 case 'p':
247e20a8 333 val |= 1 << 20;
8489eb67 334 break;
e5d4f4dc
RS
335
336 case 'b':
247e20a8 337 val |= 1 << 21;
e5d4f4dc 338 break;
8489eb67
RS
339 }
340
247e20a8 341 XSETFASTINT (XVECTOR (syntax_table)->contents[0xFF & XINT (c)], val);
8489eb67
RS
342
343 return Qnil;
344}
345\f
346/* Dump syntax table to buffer in human-readable format */
347
340f92b5 348static void
8489eb67
RS
349describe_syntax (value)
350 Lisp_Object value;
351{
352 register enum syntaxcode code;
e5d4f4dc 353 char desc, match, start1, start2, end1, end2, prefix, comstyle;
8489eb67
RS
354 char str[2];
355
356 Findent_to (make_number (16), make_number (1));
357
330cf232 358 if (!INTEGERP (value))
8489eb67
RS
359 {
360 insert_string ("invalid");
361 return;
362 }
363
364 code = (enum syntaxcode) (XINT (value) & 0377);
365 match = (XINT (value) >> 8) & 0377;
366 start1 = (XINT (value) >> 16) & 1;
367 start2 = (XINT (value) >> 17) & 1;
368 end1 = (XINT (value) >> 18) & 1;
369 end2 = (XINT (value) >> 19) & 1;
370 prefix = (XINT (value) >> 20) & 1;
e5d4f4dc 371 comstyle = (XINT (value) >> 21) & 1;
8489eb67
RS
372
373 if ((int) code < 0 || (int) code >= (int) Smax)
374 {
375 insert_string ("invalid");
376 return;
377 }
378 desc = syntax_code_spec[(int) code];
379
380 str[0] = desc, str[1] = 0;
381 insert (str, 1);
382
383 str[0] = match ? match : ' ';
384 insert (str, 1);
385
386
387 if (start1)
388 insert ("1", 1);
389 if (start2)
390 insert ("2", 1);
391
392 if (end1)
393 insert ("3", 1);
394 if (end2)
395 insert ("4", 1);
396
397 if (prefix)
398 insert ("p", 1);
e5d4f4dc
RS
399 if (comstyle)
400 insert ("b", 1);
8489eb67
RS
401
402 insert_string ("\twhich means: ");
403
0220c518 404 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
405 {
406 case Swhitespace:
407 insert_string ("whitespace"); break;
408 case Spunct:
409 insert_string ("punctuation"); break;
410 case Sword:
411 insert_string ("word"); break;
412 case Ssymbol:
413 insert_string ("symbol"); break;
414 case Sopen:
415 insert_string ("open"); break;
416 case Sclose:
417 insert_string ("close"); break;
418 case Squote:
419 insert_string ("quote"); break;
420 case Sstring:
421 insert_string ("string"); break;
422 case Smath:
423 insert_string ("math"); break;
424 case Sescape:
425 insert_string ("escape"); break;
426 case Scharquote:
427 insert_string ("charquote"); break;
428 case Scomment:
429 insert_string ("comment"); break;
430 case Sendcomment:
431 insert_string ("endcomment"); break;
6cb71bf6
RS
432 case Sinherit:
433 insert_string ("inherit"); break;
8489eb67
RS
434 default:
435 insert_string ("invalid");
436 return;
437 }
438
439 if (match)
440 {
441 insert_string (", matches ");
410e16ff 442 insert_char (match);
8489eb67
RS
443 }
444
445 if (start1)
446 insert_string (",\n\t is the first character of a comment-start sequence");
447 if (start2)
448 insert_string (",\n\t is the second character of a comment-start sequence");
449
450 if (end1)
451 insert_string (",\n\t is the first character of a comment-end sequence");
452 if (end2)
453 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
454 if (comstyle)
455 insert_string (" (comment style b)");
456
8489eb67
RS
457 if (prefix)
458 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
459
460 insert_string ("\n");
461}
462
340f92b5 463static Lisp_Object
8489eb67
RS
464describe_syntax_1 (vector)
465 Lisp_Object vector;
466{
467 struct buffer *old = current_buffer;
468 set_buffer_internal (XBUFFER (Vstandard_output));
5588734e 469 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
36cd82fe 470 call0 (intern ("help-mode"));
8489eb67
RS
471 set_buffer_internal (old);
472 return Qnil;
473}
474
475DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
476 "Describe the syntax specifications in the syntax table.\n\
477The descriptions are inserted in a buffer, which is then displayed.")
478 ()
479{
480 internal_with_output_to_temp_buffer
481 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
482
483 return Qnil;
484}
485\f
486/* Return the position across COUNT words from FROM.
487 If that many words cannot be found before the end of the buffer, return 0.
488 COUNT negative means scan backward and stop at word beginning. */
489
490scan_words (from, count)
491 register int from, count;
492{
493 register int beg = BEGV;
494 register int end = ZV;
495 register int code;
496
497 immediate_quit = 1;
498 QUIT;
499
500 while (count > 0)
501 {
502 while (1)
503 {
504 if (from == end)
505 {
506 immediate_quit = 0;
507 return 0;
508 }
509 code = SYNTAX (FETCH_CHAR (from));
510 if (words_include_escapes
511 && (code == Sescape || code == Scharquote))
512 break;
513 if (code == Sword)
514 break;
515 from++;
516 }
517 while (1)
518 {
519 if (from == end) break;
520 code = SYNTAX (FETCH_CHAR (from));
521 if (!(words_include_escapes
522 && (code == Sescape || code == Scharquote)))
523 if (code != Sword)
524 break;
525 from++;
526 }
527 count--;
528 }
529 while (count < 0)
530 {
531 while (1)
532 {
533 if (from == beg)
534 {
535 immediate_quit = 0;
536 return 0;
537 }
538 code = SYNTAX (FETCH_CHAR (from - 1));
539 if (words_include_escapes
540 && (code == Sescape || code == Scharquote))
541 break;
542 if (code == Sword)
543 break;
544 from--;
545 }
546 while (1)
547 {
548 if (from == beg) break;
549 code = SYNTAX (FETCH_CHAR (from - 1));
550 if (!(words_include_escapes
551 && (code == Sescape || code == Scharquote)))
552 if (code != Sword)
553 break;
554 from--;
555 }
556 count++;
557 }
558
559 immediate_quit = 0;
560
561 return from;
562}
563
564DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
565 "Move point forward ARG words (backward if ARG is negative).\n\
566Normally returns t.\n\
567If an edge of the buffer is reached, point is left there\n\
568and nil is returned.")
569 (count)
570 Lisp_Object count;
571{
572 int val;
573 CHECK_NUMBER (count, 0);
574
575 if (!(val = scan_words (point, XINT (count))))
576 {
577 SET_PT (XINT (count) > 0 ? ZV : BEGV);
578 return Qnil;
579 }
580 SET_PT (val);
581 return Qt;
582}
583\f
b3cfe0c8
RS
584DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
585 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 586Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 587Set point to where scanning stops.\n\
b3cfe0c8
RS
588If N comments are found as expected, with nothing except whitespace\n\
589between them, return t; otherwise return nil.")
590 (count)
840f481c 591 Lisp_Object count;
b3cfe0c8
RS
592{
593 register int from;
594 register int stop;
595 register int c;
596 register enum syntaxcode code;
597 int comstyle = 0; /* style of comment encountered */
be720845 598 int found;
840f481c
RS
599 int count1;
600
601 CHECK_NUMBER (count, 0);
602 count1 = XINT (count);
b3cfe0c8
RS
603
604 immediate_quit = 1;
605 QUIT;
606
607 from = PT;
608
840f481c 609 while (count1 > 0)
b3cfe0c8
RS
610 {
611 stop = ZV;
04882296 612 do
b3cfe0c8 613 {
04882296
KH
614 if (from == stop)
615 {
616 SET_PT (from);
b7e6e612 617 immediate_quit = 0;
04882296
KH
618 return Qnil;
619 }
b3cfe0c8
RS
620 c = FETCH_CHAR (from);
621 code = SYNTAX (c);
622 from++;
623 comstyle = 0;
624 if (from < stop && SYNTAX_COMSTART_FIRST (c)
625 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
626 {
7fc8191e
RS
627 /* We have encountered a comment start sequence and we
628 are ignoring all text inside comments. We must record
b3cfe0c8
RS
629 the comment style this sequence begins so that later,
630 only a comment end of the same style actually ends
7fc8191e 631 the comment section. */
b3cfe0c8
RS
632 code = Scomment;
633 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
634 from++;
635 }
04882296
KH
636 }
637 while (code == Swhitespace || code == Sendcomment);
638 if (code != Scomment)
639 {
640 immediate_quit = 0;
641 SET_PT (from - 1);
642 return Qnil;
643 }
644 /* We're at the start of a comment. */
645 while (1)
646 {
647 if (from == stop)
b3cfe0c8
RS
648 {
649 immediate_quit = 0;
04882296 650 SET_PT (from);
b3cfe0c8
RS
651 return Qnil;
652 }
04882296 653 c = FETCH_CHAR (from);
2eb1d6a2 654 from++;
04882296
KH
655 if (SYNTAX (c) == Sendcomment
656 && SYNTAX_COMMENT_STYLE (c) == comstyle)
657 /* we have encountered a comment end of the same style
658 as the comment sequence which began this comment
659 section */
660 break;
04882296
KH
661 if (from < stop && SYNTAX_COMEND_FIRST (c)
662 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
663 && SYNTAX_COMMENT_STYLE (c) == comstyle)
664 /* we have encountered a comment end of the same style
665 as the comment sequence which began this comment
666 section */
667 { from++; break; }
b3cfe0c8 668 }
04882296 669 /* We have skipped one comment. */
840f481c 670 count1--;
b3cfe0c8
RS
671 }
672
840f481c 673 while (count1 < 0)
b3cfe0c8
RS
674 {
675 stop = BEGV;
676 while (from > stop)
677 {
678 int quoted;
679
680 from--;
681 quoted = char_quoted (from);
682 if (quoted)
683 from--;
684 c = FETCH_CHAR (from);
685 code = SYNTAX (c);
686 comstyle = 0;
7fc8191e
RS
687 if (code == Sendcomment)
688 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8
RS
689 if (from > stop && SYNTAX_COMEND_SECOND (c)
690 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
691 && !char_quoted (from - 1))
692 {
7fc8191e 693 /* We must record the comment style encountered so that
b3cfe0c8 694 later, we can match only the proper comment begin
7fc8191e 695 sequence of the same style. */
b3cfe0c8
RS
696 code = Sendcomment;
697 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
698 from--;
699 }
700
701 if (code == Sendcomment && !quoted)
702 {
8f9dc2ed 703#if 0
b3cfe0c8
RS
704 if (code != SYNTAX (c))
705 /* For a two-char comment ender, we can assume
706 it does end a comment. So scan back in a simple way. */
707 {
708 if (from != stop) from--;
709 while (1)
710 {
711 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
712 && SYNTAX_COMMENT_STYLE (c) == comstyle)
713 break;
714 if (from == stop)
715 {
716 immediate_quit = 0;
c81a3712 717 SET_PT (from);
b3cfe0c8
RS
718 return Qnil;
719 }
720 from--;
721 if (SYNTAX_COMSTART_SECOND (c)
722 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
723 && SYNTAX_COMMENT_STYLE (c) == comstyle
724 && !char_quoted (from))
725 break;
726 }
727 break;
728 }
8f9dc2ed 729#endif /* 0 */
b3cfe0c8
RS
730
731 /* Look back, counting the parity of string-quotes,
732 and recording the comment-starters seen.
733 When we reach a safe place, assume that's not in a string;
734 then step the main scan to the earliest comment-starter seen
735 an even number of string quotes away from the safe place.
736
737 OFROM[I] is position of the earliest comment-starter seen
738 which is I+2X quotes from the comment-end.
739 PARITY is current parity of quotes from the comment end. */
740 {
741 int parity = 0;
742 char my_stringend = 0;
743 int string_lossage = 0;
744 int comment_end = from;
745 int comstart_pos = 0;
746 int comstart_parity = 0;
4841063f 747 int scanstart = from - 1;
b3cfe0c8
RS
748
749 /* At beginning of range to scan, we're outside of strings;
750 that determines quote parity to the comment-end. */
751 while (from != stop)
752 {
753 /* Move back and examine a character. */
754 from--;
755
756 c = FETCH_CHAR (from);
757 code = SYNTAX (c);
758
759 /* If this char is the second of a 2-char comment sequence,
760 back up and give the pair the appropriate syntax. */
761 if (from > stop && SYNTAX_COMEND_SECOND (c)
762 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
763 {
764 code = Sendcomment;
765 from--;
4841063f 766 c = FETCH_CHAR (from);
b3cfe0c8
RS
767 }
768
4841063f
RS
769 /* If this char starts a 2-char comment start sequence,
770 treat it like a 1-char comment starter. */
771 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
772 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
773 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
774 code = Scomment;
b3cfe0c8
RS
775
776 /* Ignore escaped characters. */
777 if (char_quoted (from))
778 continue;
779
780 /* Track parity of quotes. */
781 if (code == Sstring)
782 {
783 parity ^= 1;
784 if (my_stringend == 0)
785 my_stringend = c;
786 /* If we have two kinds of string delimiters.
787 There's no way to grok this scanning backwards. */
788 else if (my_stringend != c)
789 string_lossage = 1;
790 }
791
792 /* Record comment-starters according to that
793 quote-parity to the comment-end. */
794 if (code == Scomment)
795 {
796 comstart_parity = parity;
797 comstart_pos = from;
798 }
799
800 /* If we find another earlier comment-ender,
eb8c3be9 801 any comment-starts earlier than that don't count
b3cfe0c8
RS
802 (because they go with the earlier comment-ender). */
803 if (code == Sendcomment
804 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
805 break;
806
807 /* Assume a defun-start point is outside of strings. */
808 if (code == Sopen
809 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
810 break;
811 }
812
813 if (comstart_pos == 0)
814 from = comment_end;
815 /* If the earliest comment starter
816 is followed by uniform paired string quotes or none,
817 we know it can't be inside a string
818 since if it were then the comment ender would be inside one.
819 So it does start a comment. Skip back to it. */
820 else if (comstart_parity == 0 && !string_lossage)
821 from = comstart_pos;
822 else
823 {
824 /* We had two kinds of string delimiters mixed up
825 together. Decode this going forwards.
826 Scan fwd from the previous comment ender
827 to the one in question; this records where we
828 last passed a comment starter. */
829 struct lisp_parse_state state;
830 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 831 comment_end - 1, -10000, 0, Qnil, 0);
b3cfe0c8
RS
832 if (state.incomment)
833 from = state.comstart;
834 else
835 /* We can't grok this as a comment; scan it normally. */
836 from = comment_end;
837 }
838 }
7fc8191e
RS
839 /* We have skipped one comment. */
840 break;
b3cfe0c8 841 }
c81a3712 842 else if ((code != Swhitespace && code != Scomment) || quoted)
b3cfe0c8
RS
843 {
844 immediate_quit = 0;
c81a3712 845 SET_PT (from + 1);
b3cfe0c8
RS
846 return Qnil;
847 }
848 }
849
840f481c 850 count1++;
b3cfe0c8
RS
851 }
852
853 SET_PT (from);
854 immediate_quit = 0;
855 return Qt;
856}
857\f
8489eb67
RS
858int parse_sexp_ignore_comments;
859
860Lisp_Object
861scan_lists (from, count, depth, sexpflag)
862 register int from;
863 int count, depth, sexpflag;
864{
865 Lisp_Object val;
866 register int stop;
867 register int c;
ee384542 868 unsigned char stringterm;
8489eb67
RS
869 int quoted;
870 int mathexit = 0;
871 register enum syntaxcode code;
872 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 873 int comstyle = 0; /* style of comment encountered */
8489eb67
RS
874
875 if (depth > 0) min_depth = 0;
876
877 immediate_quit = 1;
878 QUIT;
879
880 while (count > 0)
881 {
882 stop = ZV;
883 while (from < stop)
884 {
885 c = FETCH_CHAR (from);
e5d4f4dc 886 code = SYNTAX (c);
8489eb67
RS
887 from++;
888 if (from < stop && SYNTAX_COMSTART_FIRST (c)
889 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
890 && parse_sexp_ignore_comments)
e5d4f4dc
RS
891 {
892 /* we have encountered a comment start sequence and we
893 are ignoring all text inside comments. we must record
894 the comment style this sequence begins so that later,
895 only a comment end of the same style actually ends
896 the comment section */
897 code = Scomment;
898 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
899 from++;
900 }
901
8489eb67
RS
902 if (SYNTAX_PREFIX (c))
903 continue;
904
0220c518 905 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
906 {
907 case Sescape:
908 case Scharquote:
909 if (from == stop) goto lose;
910 from++;
911 /* treat following character as a word constituent */
912 case Sword:
913 case Ssymbol:
914 if (depth || !sexpflag) break;
915 /* This word counts as a sexp; return at end of it. */
916 while (from < stop)
917 {
0220c518 918 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
919 {
920 case Scharquote:
921 case Sescape:
922 from++;
923 if (from == stop) goto lose;
924 break;
925 case Sword:
926 case Ssymbol:
927 case Squote:
928 break;
929 default:
930 goto done;
931 }
932 from++;
933 }
934 goto done;
935
936 case Scomment:
937 if (!parse_sexp_ignore_comments) break;
938 while (1)
939 {
c01f7ec0
RS
940 if (from == stop)
941 {
942 if (depth == 0)
943 goto done;
944 goto lose;
945 }
e5d4f4dc
RS
946 c = FETCH_CHAR (from);
947 if (SYNTAX (c) == Sendcomment
948 && SYNTAX_COMMENT_STYLE (c) == comstyle)
949 /* we have encountered a comment end of the same style
950 as the comment sequence which began this comment
951 section */
8489eb67
RS
952 break;
953 from++;
954 if (from < stop && SYNTAX_COMEND_FIRST (c)
e5d4f4dc
RS
955 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
956 && SYNTAX_COMMENT_STYLE (c) == comstyle)
957 /* we have encountered a comment end of the same style
958 as the comment sequence which began this comment
959 section */
8489eb67
RS
960 { from++; break; }
961 }
962 break;
963
964 case Smath:
965 if (!sexpflag)
966 break;
967 if (from != stop && c == FETCH_CHAR (from))
968 from++;
969 if (mathexit)
970 {
971 mathexit = 0;
972 goto close1;
973 }
974 mathexit = 1;
975
976 case Sopen:
977 if (!++depth) goto done;
978 break;
979
980 case Sclose:
981 close1:
982 if (!--depth) goto done;
983 if (depth < min_depth)
984 error ("Containing expression ends prematurely");
985 break;
986
987 case Sstring:
988 stringterm = FETCH_CHAR (from - 1);
989 while (1)
990 {
991 if (from >= stop) goto lose;
992 if (FETCH_CHAR (from) == stringterm) break;
0220c518 993 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
994 {
995 case Scharquote:
996 case Sescape:
997 from++;
998 }
999 from++;
1000 }
1001 from++;
1002 if (!depth && sexpflag) goto done;
1003 break;
1004 }
1005 }
1006
1007 /* Reached end of buffer. Error if within object, return nil if between */
1008 if (depth) goto lose;
1009
1010 immediate_quit = 0;
1011 return Qnil;
1012
1013 /* End of object reached */
1014 done:
1015 count--;
1016 }
1017
1018 while (count < 0)
1019 {
1020 stop = BEGV;
1021 while (from > stop)
1022 {
1023 from--;
1024 if (quoted = char_quoted (from))
1025 from--;
1026 c = FETCH_CHAR (from);
1027 code = SYNTAX (c);
7fc8191e
RS
1028 comstyle = 0;
1029 if (code == Sendcomment)
1030 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67
RS
1031 if (from > stop && SYNTAX_COMEND_SECOND (c)
1032 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1))
1033 && !char_quoted (from - 1)
1034 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1035 {
1036 /* we must record the comment style encountered so that
1037 later, we can match only the proper comment begin
1038 sequence of the same style */
1039 code = Sendcomment;
1040 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from - 1));
1041 from--;
1042 }
1043
8489eb67
RS
1044 if (SYNTAX_PREFIX (c))
1045 continue;
1046
0220c518 1047 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
8489eb67
RS
1048 {
1049 case Sword:
1050 case Ssymbol:
1051 if (depth || !sexpflag) break;
1052 /* This word counts as a sexp; count object finished after passing it. */
1053 while (from > stop)
1054 {
1055 quoted = char_quoted (from - 1);
1056 if (quoted)
1057 from--;
e5d4f4dc
RS
1058 if (! (quoted || SYNTAX (FETCH_CHAR (from - 1)) == Sword
1059 || SYNTAX (FETCH_CHAR (from - 1)) == Ssymbol
1060 || SYNTAX (FETCH_CHAR (from - 1)) == Squote))
8489eb67
RS
1061 goto done2;
1062 from--;
1063 }
1064 goto done2;
1065
1066 case Smath:
1067 if (!sexpflag)
1068 break;
1069 if (from != stop && c == FETCH_CHAR (from - 1))
1070 from--;
1071 if (mathexit)
1072 {
1073 mathexit = 0;
1074 goto open2;
1075 }
1076 mathexit = 1;
1077
1078 case Sclose:
1079 if (!++depth) goto done2;
1080 break;
1081
1082 case Sopen:
1083 open2:
1084 if (!--depth) goto done2;
1085 if (depth < min_depth)
1086 error ("Containing expression ends prematurely");
1087 break;
1088
1089 case Sendcomment:
1090 if (!parse_sexp_ignore_comments)
1091 break;
8f9dc2ed 1092#if 0
37bef230
RS
1093 if (code != SYNTAX (c))
1094 /* For a two-char comment ender, we can assume
1095 it does end a comment. So scan back in a simple way. */
1096 {
1097 if (from != stop) from--;
1098 while (1)
1099 {
1100 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1101 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1102 break;
c01f7ec0
RS
1103 if (from == stop)
1104 {
1105 if (depth == 0)
1106 goto done2;
1107 goto lose;
1108 }
37bef230
RS
1109 from--;
1110 if (SYNTAX_COMSTART_SECOND (c)
1111 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1112 && SYNTAX_COMMENT_STYLE (c) == comstyle
1113 && !char_quoted (from))
1114 break;
1115 }
1116 break;
1117 }
8f9dc2ed 1118#endif /* 0 */
37bef230 1119
8489eb67
RS
1120 /* Look back, counting the parity of string-quotes,
1121 and recording the comment-starters seen.
1122 When we reach a safe place, assume that's not in a string;
1123 then step the main scan to the earliest comment-starter seen
1124 an even number of string quotes away from the safe place.
1125
1126 OFROM[I] is position of the earliest comment-starter seen
1127 which is I+2X quotes from the comment-end.
1128 PARITY is current parity of quotes from the comment end. */
1129 {
8489eb67 1130 int parity = 0;
e5d4f4dc
RS
1131 char my_stringend = 0;
1132 int string_lossage = 0;
1133 int comment_end = from;
37bef230
RS
1134 int comstart_pos = 0;
1135 int comstart_parity = 0;
4841063f 1136 int scanstart = from - 1;
8489eb67
RS
1137
1138 /* At beginning of range to scan, we're outside of strings;
1139 that determines quote parity to the comment-end. */
1140 while (from != stop)
1141 {
1142 /* Move back and examine a character. */
1143 from--;
1144
1145 c = FETCH_CHAR (from);
1146 code = SYNTAX (c);
1147
1148 /* If this char is the second of a 2-char comment sequence,
1149 back up and give the pair the appropriate syntax. */
1150 if (from > stop && SYNTAX_COMEND_SECOND (c)
1151 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
e5d4f4dc
RS
1152 {
1153 code = Sendcomment;
1154 from--;
4841063f 1155 c = FETCH_CHAR (from);
e5d4f4dc
RS
1156 }
1157
4841063f
RS
1158 /* If this char starts a 2-char comment start sequence,
1159 treat it like a 1-char comment starter. */
1160 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1161 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
1162 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
1163 code = Scomment;
8489eb67
RS
1164
1165 /* Ignore escaped characters. */
1166 if (char_quoted (from))
1167 continue;
1168
37bef230 1169 /* Track parity of quotes. */
8489eb67 1170 if (code == Sstring)
e5d4f4dc
RS
1171 {
1172 parity ^= 1;
1173 if (my_stringend == 0)
1174 my_stringend = c;
37bef230 1175 /* If we have two kinds of string delimiters.
e5d4f4dc
RS
1176 There's no way to grok this scanning backwards. */
1177 else if (my_stringend != c)
1178 string_lossage = 1;
1179 }
8489eb67
RS
1180
1181 /* Record comment-starters according to that
1182 quote-parity to the comment-end. */
1183 if (code == Scomment)
37bef230
RS
1184 {
1185 comstart_parity = parity;
1186 comstart_pos = from;
1187 }
8489eb67 1188
37bef230 1189 /* If we find another earlier comment-ender,
eb8c3be9 1190 any comment-starts earlier than that don't count
37bef230 1191 (because they go with the earlier comment-ender). */
e5d4f4dc
RS
1192 if (code == Sendcomment
1193 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
8489eb67 1194 break;
37bef230
RS
1195
1196 /* Assume a defun-start point is outside of strings. */
1197 if (code == Sopen
1198 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
1199 break;
8489eb67 1200 }
37bef230
RS
1201
1202 if (comstart_pos == 0)
1203 from = comment_end;
1204 /* If the earliest comment starter
1205 is followed by uniform paired string quotes or none,
1206 we know it can't be inside a string
1207 since if it were then the comment ender would be inside one.
1208 So it does start a comment. Skip back to it. */
1209 else if (comstart_parity == 0 && !string_lossage)
1210 from = comstart_pos;
1211 else
e5d4f4dc
RS
1212 {
1213 /* We had two kinds of string delimiters mixed up
1214 together. Decode this going forwards.
1215 Scan fwd from the previous comment ender
1216 to the one in question; this records where we
1217 last passed a comment starter. */
1218 struct lisp_parse_state state;
37bef230 1219 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 1220 comment_end - 1, -10000, 0, Qnil, 0);
e5d4f4dc
RS
1221 if (state.incomment)
1222 from = state.comstart;
1223 else
1224 /* We can't grok this as a comment; scan it normally. */
1225 from = comment_end;
1226 }
8489eb67
RS
1227 }
1228 break;
1229
1230 case Sstring:
1231 stringterm = FETCH_CHAR (from);
1232 while (1)
1233 {
1234 if (from == stop) goto lose;
1235 if (!char_quoted (from - 1)
1236 && stringterm == FETCH_CHAR (from - 1))
1237 break;
1238 from--;
1239 }
1240 from--;
1241 if (!depth && sexpflag) goto done2;
1242 break;
1243 }
1244 }
1245
1246 /* Reached start of buffer. Error if within object, return nil if between */
1247 if (depth) goto lose;
1248
1249 immediate_quit = 0;
1250 return Qnil;
1251
1252 done2:
1253 count++;
1254 }
1255
1256
1257 immediate_quit = 0;
1e142fb7 1258 XSETFASTINT (val, from);
8489eb67
RS
1259 return val;
1260
1261 lose:
1262 error ("Unbalanced parentheses");
1263 /* NOTREACHED */
1264}
1265
340f92b5 1266static int
8489eb67
RS
1267char_quoted (pos)
1268 register int pos;
1269{
1270 register enum syntaxcode code;
1271 register int beg = BEGV;
1272 register int quoted = 0;
1273
1274 while (pos > beg
1275 && ((code = SYNTAX (FETCH_CHAR (pos - 1))) == Scharquote
1276 || code == Sescape))
1277 pos--, quoted = !quoted;
1278 return quoted;
1279}
1280
1281DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1282 "Scan from character number FROM by COUNT lists.\n\
1283Returns the character number of the position thus found.\n\
1284\n\
1285If DEPTH is nonzero, paren depth begins counting from that value,\n\
1286only places where the depth in parentheses becomes zero\n\
1287are candidates for stopping; COUNT such places are counted.\n\
1288Thus, a positive value for DEPTH means go out levels.\n\
1289\n\
1290Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1291\n\
1292If the beginning or end of (the accessible part of) the buffer is reached\n\
1293and the depth is wrong, an error is signaled.\n\
1294If the depth is right but the count is not used up, nil is returned.")
1295 (from, count, depth)
1296 Lisp_Object from, count, depth;
1297{
1298 CHECK_NUMBER (from, 0);
1299 CHECK_NUMBER (count, 1);
1300 CHECK_NUMBER (depth, 2);
1301
1302 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1303}
1304
1305DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1306 "Scan from character number FROM by COUNT balanced expressions.\n\
1307If COUNT is negative, scan backwards.\n\
1308Returns the character number of the position thus found.\n\
1309\n\
1310Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1311\n\
1312If the beginning or end of (the accessible part of) the buffer is reached\n\
1313in the middle of a parenthetical grouping, an error is signaled.\n\
1314If the beginning or end is reached between groupings\n\
1315but before count is used up, nil is returned.")
1316 (from, count)
1317 Lisp_Object from, count;
1318{
1319 CHECK_NUMBER (from, 0);
1320 CHECK_NUMBER (count, 1);
1321
1322 return scan_lists (XINT (from), XINT (count), 0, 1);
1323}
1324
1325DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1326 0, 0, 0,
1327 "Move point backward over any number of chars with prefix syntax.\n\
1328This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1329 ()
1330{
1331 int beg = BEGV;
1332 int pos = point;
1333
1334 while (pos > beg && !char_quoted (pos - 1)
1335 && (SYNTAX (FETCH_CHAR (pos - 1)) == Squote
1336 || SYNTAX_PREFIX (FETCH_CHAR (pos - 1))))
1337 pos--;
1338
1339 SET_PT (pos);
1340
1341 return Qnil;
1342}
1343\f
8489eb67 1344/* Parse forward from FROM to END,
e5d4f4dc
RS
1345 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1346 and return a description of the state of the parse at END.
c81a3712
RS
1347 If STOPBEFORE is nonzero, stop at the start of an atom.
1348 If COMMENTSTOP is nonzero, stop at the start of a comment. */
8489eb67 1349
340f92b5 1350static void
c81a3712
RS
1351scan_sexps_forward (stateptr, from, end, targetdepth,
1352 stopbefore, oldstate, commentstop)
e5d4f4dc 1353 struct lisp_parse_state *stateptr;
8489eb67
RS
1354 register int from;
1355 int end, targetdepth, stopbefore;
1356 Lisp_Object oldstate;
c81a3712 1357 int commentstop;
8489eb67
RS
1358{
1359 struct lisp_parse_state state;
1360
1361 register enum syntaxcode code;
1362 struct level { int last, prev; };
1363 struct level levelstart[100];
1364 register struct level *curlevel = levelstart;
1365 struct level *endlevel = levelstart + 100;
1366 char prev;
1367 register int depth; /* Paren depth of current scanning location.
1368 level - levelstart equals this except
1369 when the depth becomes negative. */
1370 int mindepth; /* Lowest DEPTH value seen. */
1371 int start_quoted = 0; /* Nonzero means starting after a char quote */
1372 Lisp_Object tem;
1373
1374 immediate_quit = 1;
1375 QUIT;
1376
265a9e55 1377 if (NILP (oldstate))
8489eb67
RS
1378 {
1379 depth = 0;
1380 state.instring = -1;
1381 state.incomment = 0;
e5d4f4dc 1382 state.comstyle = 0; /* comment style a by default */
8489eb67
RS
1383 }
1384 else
1385 {
1386 tem = Fcar (oldstate);
265a9e55 1387 if (!NILP (tem))
8489eb67
RS
1388 depth = XINT (tem);
1389 else
1390 depth = 0;
1391
1392 oldstate = Fcdr (oldstate);
1393 oldstate = Fcdr (oldstate);
1394 oldstate = Fcdr (oldstate);
1395 tem = Fcar (oldstate);
265a9e55 1396 state.instring = !NILP (tem) ? XINT (tem) : -1;
8489eb67
RS
1397
1398 oldstate = Fcdr (oldstate);
1399 tem = Fcar (oldstate);
265a9e55 1400 state.incomment = !NILP (tem);
8489eb67
RS
1401
1402 oldstate = Fcdr (oldstate);
1403 tem = Fcar (oldstate);
265a9e55 1404 start_quoted = !NILP (tem);
e5d4f4dc
RS
1405
1406 /* if the eight element of the list is nil, we are in comment
1407 style a. if it is non-nil, we are in comment style b */
1408 oldstate = Fcdr (oldstate);
1409 oldstate = Fcdr (oldstate);
e5d4f4dc
RS
1410 tem = Fcar (oldstate);
1411 state.comstyle = !NILP (tem);
8489eb67
RS
1412 }
1413 state.quoted = 0;
1414 mindepth = depth;
1415
1416 curlevel->prev = -1;
1417 curlevel->last = -1;
1418
1419 /* Enter the loop at a place appropriate for initial state. */
1420
1421 if (state.incomment) goto startincomment;
1422 if (state.instring >= 0)
1423 {
1424 if (start_quoted) goto startquotedinstring;
1425 goto startinstring;
1426 }
1427 if (start_quoted) goto startquoted;
1428
1429 while (from < end)
1430 {
e5d4f4dc 1431 code = SYNTAX (FETCH_CHAR (from));
8489eb67 1432 from++;
8f9dc2ed
RS
1433 if (code == Scomment)
1434 state.comstart = from-1;
1435
1436 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
1437 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
e5d4f4dc
RS
1438 {
1439 /* Record the comment style we have entered so that only
1440 the comment-end sequence of the same style actually
1441 terminates the comment section. */
1442 code = Scomment;
1443 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
8f9dc2ed 1444 state.comstart = from-1;
e5d4f4dc
RS
1445 from++;
1446 }
1447
8489eb67
RS
1448 if (SYNTAX_PREFIX (FETCH_CHAR (from - 1)))
1449 continue;
0220c518 1450 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1451 {
1452 case Sescape:
1453 case Scharquote:
1454 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1455 curlevel->last = from - 1;
1456 startquoted:
1457 if (from == end) goto endquoted;
1458 from++;
1459 goto symstarted;
1460 /* treat following character as a word constituent */
1461 case Sword:
1462 case Ssymbol:
1463 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1464 curlevel->last = from - 1;
1465 symstarted:
1466 while (from < end)
1467 {
0220c518 1468 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1469 {
1470 case Scharquote:
1471 case Sescape:
1472 from++;
1473 if (from == end) goto endquoted;
1474 break;
1475 case Sword:
1476 case Ssymbol:
1477 case Squote:
1478 break;
1479 default:
1480 goto symdone;
1481 }
1482 from++;
1483 }
1484 symdone:
1485 curlevel->prev = curlevel->last;
1486 break;
1487
5a28e48c
RS
1488 startincomment:
1489 if (commentstop)
1490 goto done;
1491 if (from != BEGV)
1492 {
1493 /* Enter the loop in the middle so that we find
1494 a 2-char comment ender if we start in the middle of it. */
1495 prev = FETCH_CHAR (from - 1);
1496 goto startincomment_1;
1497 }
1498 /* At beginning of buffer, enter the loop the ordinary way. */
1499
8489eb67
RS
1500 case Scomment:
1501 state.incomment = 1;
c81a3712
RS
1502 if (commentstop)
1503 goto done;
8489eb67
RS
1504 while (1)
1505 {
1506 if (from == end) goto done;
e5d4f4dc
RS
1507 prev = FETCH_CHAR (from);
1508 if (SYNTAX (prev) == Sendcomment
1509 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1510 /* Only terminate the comment section if the endcomment
1511 of the same style as the start sequence has been
1512 encountered. */
8489eb67
RS
1513 break;
1514 from++;
5a28e48c 1515 startincomment_1:
8489eb67 1516 if (from < end && SYNTAX_COMEND_FIRST (prev)
e5d4f4dc
RS
1517 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1518 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1519 /* Only terminate the comment section if the end-comment
1520 sequence of the same style as the start sequence has
1521 been encountered. */
8489eb67
RS
1522 { from++; break; }
1523 }
1524 state.incomment = 0;
e5d4f4dc 1525 state.comstyle = 0; /* reset the comment style */
8489eb67
RS
1526 break;
1527
1528 case Sopen:
1529 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1530 depth++;
1531 /* curlevel++->last ran into compiler bug on Apollo */
1532 curlevel->last = from - 1;
1533 if (++curlevel == endlevel)
1534 error ("Nesting too deep for parser");
1535 curlevel->prev = -1;
1536 curlevel->last = -1;
30844415 1537 if (targetdepth == depth) goto done;
8489eb67
RS
1538 break;
1539
1540 case Sclose:
1541 depth--;
1542 if (depth < mindepth)
1543 mindepth = depth;
1544 if (curlevel != levelstart)
1545 curlevel--;
1546 curlevel->prev = curlevel->last;
30844415 1547 if (targetdepth == depth) goto done;
8489eb67
RS
1548 break;
1549
1550 case Sstring:
1551 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1552 curlevel->last = from - 1;
1553 state.instring = FETCH_CHAR (from - 1);
1554 startinstring:
1555 while (1)
1556 {
1557 if (from >= end) goto done;
1558 if (FETCH_CHAR (from) == state.instring) break;
0220c518 1559 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1560 {
1561 case Scharquote:
1562 case Sescape:
1563 from++;
1564 startquotedinstring:
1565 if (from >= end) goto endquoted;
1566 }
1567 from++;
1568 }
1569 state.instring = -1;
1570 curlevel->prev = curlevel->last;
1571 from++;
1572 break;
1573
1574 case Smath:
1575 break;
1576 }
1577 }
1578 goto done;
1579
1580 stop: /* Here if stopping before start of sexp. */
1581 from--; /* We have just fetched the char that starts it; */
1582 goto done; /* but return the position before it. */
1583
1584 endquoted:
1585 state.quoted = 1;
1586 done:
1587 state.depth = depth;
1588 state.mindepth = mindepth;
1589 state.thislevelstart = curlevel->prev;
1590 state.prevlevelstart
1591 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1592 state.location = from;
1593 immediate_quit = 0;
1594
e5d4f4dc 1595 *stateptr = state;
8489eb67
RS
1596}
1597
1598/* This comment supplies the doc string for parse-partial-sexp,
1599 for make-docfile to see. We cannot put this in the real DEFUN
1600 due to limits in the Unix cpp.
1601
c81a3712 1602DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
1603 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1604Parsing stops at TO or when certain criteria are met;\n\
1605 point is set to where parsing stops.\n\
1606If fifth arg STATE is omitted or nil,\n\
1607 parsing assumes that FROM is the beginning of a function.\n\
e5d4f4dc 1608Value is a list of eight elements describing final state of parsing:\n\
af50f9e5
RS
1609 0. depth in parens.\n\
1610 1. character address of start of innermost containing list; nil if none.\n\
1611 2. character address of start of last complete sexp terminated.\n\
1612 3. non-nil if inside a string.\n\
8489eb67 1613 (it is the character that will terminate the string.)\n\
af50f9e5
RS
1614 4. t if inside a comment.\n\
1615 5. t if following a quote character.\n\
1616 6. the minimum paren-depth encountered during this scan.\n\
1617 7. t if in a comment of style `b'.\n\
8489eb67
RS
1618If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1619in parentheses becomes equal to TARGETDEPTH.\n\
1620Fourth arg STOPBEFORE non-nil means stop when come to\n\
1621 any character that starts a sexp.\n\
ec11639d 1622Fifth arg STATE is an eight-list like what this function returns.\n\
a4275ad1 1623It is used to initialize the state of the parse. Its second and third
c81a3712
RS
1624elements are ignored.
1625Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1626 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
1627*/
1628
c81a3712 1629DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 1630 0 /* See immediately above */)
c81a3712
RS
1631 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1632 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
1633{
1634 struct lisp_parse_state state;
1635 int target;
1636
265a9e55 1637 if (!NILP (targetdepth))
8489eb67
RS
1638 {
1639 CHECK_NUMBER (targetdepth, 3);
1640 target = XINT (targetdepth);
1641 }
1642 else
1643 target = -100000; /* We won't reach this depth */
1644
1645 validate_region (&from, &to);
e5d4f4dc 1646 scan_sexps_forward (&state, XINT (from), XINT (to),
c81a3712
RS
1647 target, !NILP (stopbefore), oldstate,
1648 !NILP (commentstop));
8489eb67
RS
1649
1650 SET_PT (state.location);
1651
1652 return Fcons (make_number (state.depth),
1653 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1654 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1655 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1656 Fcons (state.incomment ? Qt : Qnil,
1657 Fcons (state.quoted ? Qt : Qnil,
e5d4f4dc
RS
1658 Fcons (make_number (state.mindepth),
1659 Fcons (state.comstyle ? Qt : Qnil,
1660 Qnil))))))));
8489eb67
RS
1661}
1662\f
1663init_syntax_once ()
1664{
1665 register int i;
1666 register struct Lisp_Vector *v;
1667
1668 /* Set this now, so first buffer creation can refer to it. */
1669 /* Make it nil before calling copy-syntax-table
1670 so that copy-syntax-table will know not to try to copy from garbage */
1671 Vstandard_syntax_table = Qnil;
1672 Vstandard_syntax_table = Fcopy_syntax_table (Qnil);
1673
1674 v = XVECTOR (Vstandard_syntax_table);
1675
1676 for (i = 'a'; i <= 'z'; i++)
1e142fb7 1677 XSETFASTINT (v->contents[i], (int) Sword);
8489eb67 1678 for (i = 'A'; i <= 'Z'; i++)
1e142fb7 1679 XSETFASTINT (v->contents[i], (int) Sword);
8489eb67 1680 for (i = '0'; i <= '9'; i++)
1e142fb7
KH
1681 XSETFASTINT (v->contents[i], (int) Sword);
1682 XSETFASTINT (v->contents['$'], (int) Sword);
1683 XSETFASTINT (v->contents['%'], (int) Sword);
1684
1685 XSETFASTINT (v->contents['('], (int) Sopen + (')' << 8));
1686 XSETFASTINT (v->contents[')'], (int) Sclose + ('(' << 8));
1687 XSETFASTINT (v->contents['['], (int) Sopen + (']' << 8));
1688 XSETFASTINT (v->contents[']'], (int) Sclose + ('[' << 8));
1689 XSETFASTINT (v->contents['{'], (int) Sopen + ('}' << 8));
1690 XSETFASTINT (v->contents['}'], (int) Sclose + ('{' << 8));
1691 XSETFASTINT (v->contents['"'], (int) Sstring);
1692 XSETFASTINT (v->contents['\\'], (int) Sescape);
8489eb67
RS
1693
1694 for (i = 0; i < 10; i++)
1e142fb7 1695 XSETFASTINT (v->contents["_-+*/&|<>="[i]], (int) Ssymbol);
8489eb67
RS
1696
1697 for (i = 0; i < 12; i++)
1e142fb7 1698 XSETFASTINT (v->contents[".,;:?!#@~^'`"[i]], (int) Spunct);
8489eb67
RS
1699}
1700
1701syms_of_syntax ()
1702{
1703 Qsyntax_table_p = intern ("syntax-table-p");
1704 staticpro (&Qsyntax_table_p);
1705
1706 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1707 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1708
1709 words_include_escapes = 0;
1710 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1711 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1712
1713 defsubr (&Ssyntax_table_p);
1714 defsubr (&Ssyntax_table);
1715 defsubr (&Sstandard_syntax_table);
1716 defsubr (&Scopy_syntax_table);
1717 defsubr (&Sset_syntax_table);
1718 defsubr (&Schar_syntax);
beefa22e 1719 defsubr (&Smatching_paren);
8489eb67
RS
1720 defsubr (&Smodify_syntax_entry);
1721 defsubr (&Sdescribe_syntax);
1722
1723 defsubr (&Sforward_word);
1724
b3cfe0c8 1725 defsubr (&Sforward_comment);
8489eb67
RS
1726 defsubr (&Sscan_lists);
1727 defsubr (&Sscan_sexps);
1728 defsubr (&Sbackward_prefix_chars);
1729 defsubr (&Sparse_partial_sexp);
1730}