Fix bugs with inappropriate mixing of Lisp_Object with int.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
b745025d 2 Copyright (C) 1985, 87, 93, 94, 95, 1997 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67
RS
23#include <ctype.h>
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
93da5fff 27#include "charset.h"
195d1361
RS
28
29/* Make syntax table lookup grant data in gl_state. */
30#define SYNTAX_ENTRY_VIA_PROPERTY
31
8489eb67 32#include "syntax.h"
195d1361
RS
33#include "intervals.h"
34
35/* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39#define ST_COMMENT_STYLE (256 + 1)
40#define ST_STRING_STYLE (256 + 2)
93da5fff 41#include "category.h"
8489eb67 42
7bf5e9e4 43Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67 44
340f92b5
RS
45static void scan_sexps_forward ();
46static int char_quoted ();
47
8489eb67 48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
8ea151b2
RS
51/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54Lisp_Object syntax_temp;
55
e5d4f4dc
RS
56/* This is the internal form of the parse state used in parse-partial-sexp. */
57
58struct lisp_parse_state
59 {
195d1361
RS
60 int depth; /* Depth at end of parsing. */
61 int instring; /* -1 if not within string, else desired terminator. */
62 int incomment; /* Nonzero if within a comment at end of parsing. */
63 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
64 int quoted; /* Nonzero if just after an escape char at end of parsing */
65 int thislevelstart; /* Char number of most recent start-of-expression at current level */
66 int prevlevelstart; /* Char number of start of containing expression */
195d1361 67 int location; /* Char number at which parsing stopped. */
e5d4f4dc 68 int mindepth; /* Minimum depth seen while scanning. */
195d1361 69 int comstr_start; /* Position just after last comment/string starter. */
e5d4f4dc
RS
70 };
71\f
37bef230
RS
72/* These variables are a cache for finding the start of a defun.
73 find_start_pos is the place for which the defun start was found.
74 find_start_value is the defun start position found for it.
75 find_start_buffer is the buffer it was found in.
76 find_start_begv is the BEGV value when it was found.
77 find_start_modiff is the value of MODIFF when it was found. */
78
79static int find_start_pos;
80static int find_start_value;
81static struct buffer *find_start_buffer;
82static int find_start_begv;
83static int find_start_modiff;
195d1361
RS
84\f
85
86struct gl_state_s gl_state; /* Global state of syntax parser. */
87
88INTERVAL interval_of ();
89#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
90 to scan to property-change. */
91
92/*
93 Update gl_state to an appropriate interval which contains POS. The
94 sign of COUNT give the relative position of POS wrt the previously
95 valid interval. If INIT, only [be]_property fields of gl_state are
96 valid at start, the rest is filled basing on OBJECT.
97
98 `gl_state.*_i' are the intervals, and pos is further in the search
99 direction than the intervals - or in an interval. We update the
100 current syntax-table basing on the property of this interval, and
101 update the interval to start further than POS - or be
102 NULL_INTERVAL. We also update lim_property to be the next value of
103 pos to call this subroutine again - or be before/after the
104 start/end of OBJECT. */
105
106void
107update_syntax_table (pos, count, init, object)
108 int pos, count, init;
109 Lisp_Object object;
110{
111 Lisp_Object tmp_table;
112 int cnt = 0, doing_extra = 0, invalidate = 1;
113 INTERVAL i, oldi;
114
115 if (init)
116 {
117 gl_state.start = gl_state.b_property;
118 gl_state.stop = gl_state.e_property;
119 gl_state.forward_i = interval_of (pos, object);
120 i = gl_state.backward_i = gl_state.forward_i;
121 gl_state.left_ok = gl_state.right_ok = 1;
122 invalidate = 0;
123 if (NULL_INTERVAL_P (i))
124 return;
125 gl_state.b_property = i->position - 1;
126 gl_state.e_property = INTERVAL_LAST_POS (i);
127 goto update;
128 }
129 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
130
131 /* We are guarantied to be called with pos either in i, of further off. */
132 if (NULL_INTERVAL_P (i))
133 error ("Error in syntax_table logic for to-the-end intervals");
134 else if (pos < i->position) /* Move left. */
135 {
136 if (count > 0)
137 error ("Error in syntax_table logic for intervals <-.");
138 /* Update the interval. */
139 i = update_interval (i, pos);
140 if (oldi->position != INTERVAL_LAST_POS (i))
141 {
142 invalidate = 0;
143 gl_state.right_ok = 1; /* Invalidate the other end. */
144 gl_state.forward_i = i;
145 gl_state.e_property = INTERVAL_LAST_POS (i);
146 }
147 }
148 else if (pos >= INTERVAL_LAST_POS (i)) /* Move right. */
149 {
150 if (count < 0)
151 error ("Error in syntax_table logic for intervals ->.");
152 /* Update the interval. */
153 i = update_interval (i, pos);
154 if (i->position != INTERVAL_LAST_POS (oldi))
155 {
156 invalidate = 0;
157 gl_state.left_ok = 1; /* Invalidate the other end. */
158 gl_state.backward_i = i;
159 gl_state.b_property = i->position - 1;
160 }
161 }
162 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
163 {
164 /* We do not need to recalculate tmp_table. */
165 tmp_table = gl_state.old_prop;
166 }
167
168 update:
169 tmp_table = textget (i->plist, Qsyntax_table);
170
171 if (invalidate)
172 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
173
174 if (invalidate) /* Did not get to adjacent interval. */
175 { /* with the same table => */
176 /* invalidate the old range. */
177 if (count > 0)
178 {
179 gl_state.backward_i = i;
180 gl_state.left_ok = 1; /* Invalidate the other end. */
181 gl_state.b_property = i->position - 1;
182 }
183 else
184 {
185 gl_state.forward_i = i;
186 gl_state.right_ok = 1; /* Invalidate the other end. */
187 gl_state.e_property = INTERVAL_LAST_POS (i);
188 }
189 }
37bef230 190
195d1361
RS
191 gl_state.current_syntax_table = tmp_table;
192 gl_state.old_prop = tmp_table;
193 if (Fsyntax_table_p (tmp_table) == Qt)
194 {
195 gl_state.use_global = 0;
196 }
197 else if (CONSP (tmp_table))
198 {
199 gl_state.use_global = 1;
200 gl_state.global_code = tmp_table;
201 }
202 else
203 {
204 gl_state.use_global = 0;
205 gl_state.current_syntax_table = current_buffer->syntax_table;
206 }
207
208 while (!NULL_INTERVAL_P (i))
209 {
210 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
211 {
212 if (count > 0)
213 gl_state.right_ok = 0;
214 else
215 gl_state.left_ok = 0;
216 break;
217 }
218 else if (cnt == INTERVALS_AT_ONCE)
219 {
220 if (count > 0)
221 gl_state.right_ok = 1;
222 else
223 gl_state.left_ok = 1;
224 break;
225 }
226 cnt++;
227 i = count > 0 ? next_interval (i) : previous_interval (i);
228 }
229 if (NULL_INTERVAL_P (i))
230 { /* This property goes to the end. */
231 if (count > 0)
232 gl_state.e_property = gl_state.stop;
233 else
234 gl_state.b_property = gl_state.start;
235 }
236 else
237 {
238 if (count > 0)
239 {
240 gl_state.e_property = i->position;
241 gl_state.forward_i = i;
242 }
243 else
244 {
245 gl_state.b_property = i->position + LENGTH (i) - 1;
246 gl_state.backward_i = i;
247 }
248 }
249}
250\f
251/* Returns TRUE if char at POS is quoted.
252 Global syntax-table data should be set up already to be good at pos
253 or after. On return global syntax data is good for lookup at POS. */
254
255static int
256char_quoted (pos)
257 register int pos;
258{
259 register enum syntaxcode code;
260 register int beg = BEGV;
261 register int quoted = 0;
262 int temp_pos = pos;
263
264 DEC_POS (temp_pos);
265 while (temp_pos >= beg
266 && ( UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos), 1)
267 && ((code = SYNTAX (FETCH_CHAR (temp_pos))) == Scharquote
268 || code == Sescape))
269 {
270 temp_pos--, quoted = !quoted;
271 }
272 UPDATE_SYNTAX_TABLE (pos);
273 return quoted;
274}
275\f
37bef230
RS
276/* Find a defun-start that is the last one before POS (or nearly the last).
277 We record what we find, so that another call in the same area
195d1361
RS
278 can return the same value right away.
279
280 There is no promise at which position the global syntax data is
281 valid on return from the subroutine, so the caller should explicitly
282 update the global data. */
37bef230
RS
283
284static int
285find_defun_start (pos)
286 int pos;
287{
288 int tem;
289 int shortage;
290
291 /* Use previous finding, if it's valid and applies to this inquiry. */
292 if (current_buffer == find_start_buffer
293 /* Reuse the defun-start even if POS is a little farther on.
294 POS might be in the next defun, but that's ok.
295 Our value may not be the best possible, but will still be usable. */
296 && pos <= find_start_pos + 1000
297 && pos >= find_start_value
298 && BEGV == find_start_begv
299 && MODIFF == find_start_modiff)
300 return find_start_value;
301
302 /* Back up to start of line. */
be5357e2 303 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
37bef230 304
195d1361
RS
305 /* We optimize syntax-table lookup for rare updates. Thus we accept
306 only those `^\s(' which are good in global _and_ text-property
307 syntax-tables. */
308 gl_state.current_syntax_table = current_buffer->syntax_table;
309 gl_state.use_global = 0;
37bef230
RS
310 while (tem > BEGV)
311 {
312 /* Open-paren at start of line means we found our defun-start. */
313 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
195d1361
RS
314 {
315 SETUP_SYNTAX_TABLE (tem + 1, -1); /* Try again... */
316 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
317 break;
318 /* Now fallback to the default value. */
319 gl_state.current_syntax_table = current_buffer->syntax_table;
320 gl_state.use_global = 0;
321 }
37bef230 322 /* Move to beg of previous line. */
be5357e2 323 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
37bef230
RS
324 }
325
326 /* Record what we found, for the next try. */
327 find_start_value = tem;
328 find_start_buffer = current_buffer;
329 find_start_modiff = MODIFF;
330 find_start_begv = BEGV;
331 find_start_pos = pos;
332
333 return find_start_value;
334}
335\f
195d1361
RS
336/* Checks whether FROM is the end of comment. Does not try to
337 fallback more than to STOP.
338 Returns -1 if cannot find comment ending at from, otherwise start
339 of comment. Global syntax data remains valid for
340 backward search starting at the returned value (or at FROM, if
341 the search was not successful). */
342
343static int
ea315ed6
RS
344back_comment (from, stop, comstyle)
345 int from, stop, comstyle;
195d1361
RS
346{
347 /* Look back, counting the parity of string-quotes,
348 and recording the comment-starters seen.
349 When we reach a safe place, assume that's not in a string;
350 then step the main scan to the earliest comment-starter seen
351 an even number of string quotes away from the safe place.
352
353 OFROM[I] is position of the earliest comment-starter seen
354 which is I+2X quotes from the comment-end.
355 PARITY is current parity of quotes from the comment end. */
356 int parity = 0;
357 int my_stringend = 0;
358 int string_lossage = 0;
359 int comment_end = from;
360 int comstart_pos = 0;
361 int comstart_parity = 0;
362 int scanstart = from - 1;
363 register enum syntaxcode code;
ea315ed6 364 int c;
195d1361
RS
365
366 /* At beginning of range to scan, we're outside of strings;
367 that determines quote parity to the comment-end. */
368 while (from != stop)
369 {
370 /* Move back and examine a character. */
371 DEC_POS (from);
372 UPDATE_SYNTAX_TABLE_BACKWARD (from);
373
374 c = FETCH_CHAR (from);
375 code = SYNTAX (c);
376
377 /* If this char is the second of a 2-char comment sequence,
378 back up and give the pair the appropriate syntax. */
379 if (from > stop && SYNTAX_COMEND_SECOND (c)
380 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
381 {
382 code = Sendcomment;
383 DEC_POS (from);
384 /* This is apparently the best we can do: */
385 UPDATE_SYNTAX_TABLE_BACKWARD (from);
386 c = FETCH_CHAR (from);
387 }
388
389 /* If this char starts a 2-char comment start sequence,
390 treat it like a 1-char comment starter. */
ea315ed6
RS
391 if (from < scanstart && SYNTAX_COMSTART_SECOND (c)
392 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from - 1))
393 && comstyle == SYNTAX_COMMENT_STYLE (c))
394 {
395 code = Scomment;
396 DEC_POS (from);
397 /* This is apparently the best we can do: */
398 UPDATE_SYNTAX_TABLE_BACKWARD (from);
399 c = FETCH_CHAR (from);
400 }
195d1361
RS
401
402 /* Ignore escaped characters. */
403 if (char_quoted (from))
404 continue;
405
406 /* Track parity of quotes. */
407 if (code == Sstring)
408 {
409 parity ^= 1;
410 if (my_stringend == 0)
411 my_stringend = c;
412 /* If we have two kinds of string delimiters.
413 There's no way to grok this scanning backwards. */
414 else if (my_stringend != c)
415 string_lossage = 1;
416 }
417
418 if (code == Sstring_fence || code == Scomment_fence)
419 {
420 parity ^= 1;
421 if (my_stringend == 0)
422 my_stringend =
423 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
424 /* If we have two kinds of string delimiters.
425 There's no way to grok this scanning backwards. */
426 else if (my_stringend != (code == Sstring_fence
427 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
428 string_lossage = 1;
429 }
430
431 /* Record comment-starters according to that
432 quote-parity to the comment-end. */
433 if (code == Scomment)
434 {
435 comstart_parity = parity;
436 comstart_pos = from;
437 }
438
439 /* If we find another earlier comment-ender,
440 any comment-starts earlier than that don't count
441 (because they go with the earlier comment-ender). */
442 if (code == Sendcomment
443 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
444 break;
445
446 /* Assume a defun-start point is outside of strings. */
447 if (code == Sopen
448 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
449 break;
450 }
451
452 if (comstart_pos == 0)
453 {
454 from = comment_end;
455 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
456 }
457 /* If the earliest comment starter
458 is followed by uniform paired string quotes or none,
459 we know it can't be inside a string
460 since if it were then the comment ender would be inside one.
461 So it does start a comment. Skip back to it. */
462 else if (comstart_parity == 0 && !string_lossage)
463 {
464 from = comstart_pos;
465 /* Globals are correct now. */
466 }
467 else
468 {
469 /* We had two kinds of string delimiters mixed up
470 together. Decode this going forwards.
471 Scan fwd from the previous comment ender
472 to the one in question; this records where we
473 last passed a comment starter. */
474 struct lisp_parse_state state;
475 scan_sexps_forward (&state, find_defun_start (comment_end),
476 comment_end - 1, -10000, 0, Qnil, 0);
477 if (state.incomment)
478 {
479 /* scan_sexps_forward changed the direction of search in
480 global variables, so we need to update it completely. */
481
482 from = state.comstr_start;
483 }
484 else
485 {
486 from = comment_end;
487 }
488 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
489 }
490
491 return from;
492}
493\f
8489eb67 494DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
2203e1e8 495 "Return t if OBJECT is a syntax table.\n\
8ea151b2 496Currently, any char-table counts as a syntax table.")
2203e1e8
EN
497 (object)
498 Lisp_Object object;
8489eb67 499{
2203e1e8 500 if (CHAR_TABLE_P (object)
e704cb4b 501 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
502 return Qt;
503 return Qnil;
504}
505
8ea151b2 506static void
8489eb67
RS
507check_syntax_table (obj)
508 Lisp_Object obj;
509{
d1be9f0f 510 if (!(CHAR_TABLE_P (obj)
e704cb4b 511 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 512 wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
513}
514
8489eb67
RS
515DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
516 "Return the current syntax table.\n\
517This is the one specified by the current buffer.")
518 ()
519{
520 return current_buffer->syntax_table;
521}
522
523DEFUN ("standard-syntax-table", Fstandard_syntax_table,
524 Sstandard_syntax_table, 0, 0, 0,
525 "Return the standard syntax table.\n\
526This is the one used for new buffers.")
527 ()
528{
529 return Vstandard_syntax_table;
530}
531
532DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
533 "Construct a new syntax table and return it.\n\
534It is a copy of the TABLE, which defaults to the standard syntax table.")
535 (table)
536 Lisp_Object table;
537{
8ea151b2
RS
538 Lisp_Object copy;
539
265a9e55 540 if (!NILP (table))
8ea151b2
RS
541 check_syntax_table (table);
542 else
543 table = Vstandard_syntax_table;
544
545 copy = Fcopy_sequence (table);
0f867324
RS
546
547 /* Only the standard syntax table should have a default element.
548 Other syntax tables should inherit from parents instead. */
549 XCHAR_TABLE (copy)->defalt = Qnil;
550
551 /* Copied syntax tables should all have parents.
552 If we copied one with no parent, such as the standard syntax table,
553 use the standard syntax table as the copy's parent. */
554 if (NILP (XCHAR_TABLE (copy)->parent))
555 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 556 return copy;
8489eb67
RS
557}
558
559DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
560 "Select a new syntax table for the current buffer.\n\
561One argument, a syntax table.")
562 (table)
563 Lisp_Object table;
564{
8ea151b2 565 check_syntax_table (table);
8489eb67
RS
566 current_buffer->syntax_table = table;
567 /* Indicate that this buffer now has a specified syntax table. */
f110a664
RS
568 current_buffer->local_var_flags
569 |= XFASTINT (buffer_local_flags.syntax_table);
8489eb67
RS
570 return table;
571}
572\f
573/* Convert a letter which signifies a syntax code
574 into the code it signifies.
195d1361 575 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
576
577unsigned char syntax_spec_code[0400] =
578 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
579 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
580 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
581 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 582 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
583 (char) Smath, 0377, 0377, (char) Squote,
584 (char) Sopen, (char) Sclose, 0377, 0377,
585 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
586 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
587 0377, 0377, 0377, 0377,
588 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 589 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
590 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
591 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
592 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
593 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
594 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
595 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 596 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
597 };
598
195d1361 599/* Indexed by syntax code, give the letter that describes it. */
8489eb67 600
195d1361 601char syntax_code_spec[16] =
8489eb67 602 {
195d1361
RS
603 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
604 '!', '|'
8489eb67 605 };
93da5fff
KH
606
607/* Indexed by syntax code, give the object (cons of syntax code and
608 nil) to be stored in syntax table. Since these objects can be
609 shared among syntax tables, we generate them in advance. By
610 sharing objects, the function `describe-syntax' can give a more
611 compact listing. */
612static Lisp_Object Vsyntax_code_object;
613
8489eb67 614\f
8ea151b2
RS
615/* Look up the value for CHARACTER in syntax table TABLE's parent
616 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
617 for CHARACTER. It's actually used only when not compiled with GCC. */
618
619Lisp_Object
620syntax_parent_lookup (table, character)
621 Lisp_Object table;
622 int character;
623{
624 Lisp_Object value;
625
626 while (1)
627 {
628 table = XCHAR_TABLE (table)->parent;
629 if (NILP (table))
630 return Qnil;
631
632 value = XCHAR_TABLE (table)->contents[character];
633 if (!NILP (value))
634 return value;
635 }
636}
637
8489eb67 638DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
2203e1e8
EN
639 "Return the syntax code of CHARACTER, described by a character.\n\
640For example, if CHARACTER is a word constituent,\n\
641the character `w' is returned.\n\
8489eb67
RS
642The characters that correspond to various syntax codes\n\
643are listed in the documentation of `modify-syntax-entry'.")
2203e1e8
EN
644 (character)
645 Lisp_Object character;
8489eb67 646{
8ea151b2 647 int char_int;
195d1361
RS
648 gl_state.current_syntax_table = current_buffer->syntax_table;
649
650 gl_state.use_global = 0;
2203e1e8
EN
651 CHECK_NUMBER (character, 0);
652 char_int = XINT (character);
8ea151b2 653 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
654}
655
656DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
2203e1e8
EN
657 "Return the matching parenthesis of CHARACTER, or nil if none.")
658 (character)
659 Lisp_Object character;
beefa22e 660{
8ea151b2 661 int char_int, code;
195d1361
RS
662 gl_state.current_syntax_table = current_buffer->syntax_table;
663 gl_state.use_global = 0;
2203e1e8
EN
664 CHECK_NUMBER (character, 0);
665 char_int = XINT (character);
8ea151b2 666 code = SYNTAX (char_int);
a8bd7cd8 667 if (code == Sopen || code == Sclose)
8ea151b2 668 return make_number (SYNTAX_MATCH (char_int));
beefa22e 669 return Qnil;
8489eb67
RS
670}
671
672/* This comment supplies the doc string for modify-syntax-entry,
673 for make-docfile to see. We cannot put this in the real DEFUN
674 due to limits in the Unix cpp.
675
2fcea188 676DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
677 "Set syntax for character CHAR according to string S.\n\
678The syntax is changed only for table TABLE, which defaults to\n\
679 the current buffer's syntax table.\n\
680The first character of S should be one of the following:\n\
32676c08
JB
681 Space or - whitespace syntax. w word constituent.\n\
682 _ symbol constituent. . punctuation.\n\
683 ( open-parenthesis. ) close-parenthesis.\n\
684 \" string quote. \\ escape.\n\
685 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 686 < comment starter. > comment ender.\n\
6cb71bf6 687 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 688\n\
8489eb67
RS
689Only single-character comment start and end sequences are represented thus.\n\
690Two-character sequences are represented as described below.\n\
691The second character of S is the matching parenthesis,\n\
692 used only if the first character is `(' or `)'.\n\
693Any additional characters are flags.\n\
e5d4f4dc 694Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
2203e1e8
EN
695 1 means CHAR is the start of a two-char comment start sequence.\n\
696 2 means CHAR is the second character of such a sequence.\n\
697 3 means CHAR is the start of a two-char comment end sequence.\n\
698 4 means CHAR is the second character of such a sequence.\n\
e5d4f4dc 699\n\
195d1361 700There can be up to two orthogonal comment sequences. This is to support\n\
e5d4f4dc 701language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
702a, but you can set the comment sequence style to b (on the second character\n\
703of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc 704this flag:\n\
2203e1e8 705 b means CHAR is part of comment sequence b.\n\
e5d4f4dc 706\n\
2203e1e8 707 p means CHAR is a prefix character for `backward-prefix-chars';\n\
e5d4f4dc 708 such characters are treated as whitespace when they occur\n\
8489eb67 709 between expressions.")
2fcea188 710 (char, s, table)
8489eb67
RS
711*/
712
713DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
714 /* I really don't know why this is interactive
715 help-form should at least be made useful whilst reading the second arg
716 */
717 "cSet syntax for character: \nsSet syntax for %s to: ",
718 0 /* See immediately above */)
719 (c, newentry, syntax_table)
720 Lisp_Object c, newentry, syntax_table;
721{
8ea151b2 722 register unsigned char *p;
8489eb67 723 register enum syntaxcode code;
247e20a8 724 int val;
8ea151b2 725 Lisp_Object match;
8489eb67
RS
726
727 CHECK_NUMBER (c, 0);
728 CHECK_STRING (newentry, 1);
8ea151b2 729
265a9e55 730 if (NILP (syntax_table))
8489eb67
RS
731 syntax_table = current_buffer->syntax_table;
732 else
8ea151b2 733 check_syntax_table (syntax_table);
8489eb67
RS
734
735 p = XSTRING (newentry)->data;
736 code = (enum syntaxcode) syntax_spec_code[*p++];
737 if (((int) code & 0377) == 0377)
e704cb4b 738 error ("invalid syntax description letter: %c", XINT (c));
8489eb67 739
8ea151b2
RS
740 if (code == Sinherit)
741 {
e704cb4b 742 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
8ea151b2
RS
743 return Qnil;
744 }
745
746 if (*p)
d1be9f0f 747 {
93da5fff
KH
748 int len;
749 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
750 len);
751 XSETINT (match, character);
d1be9f0f
RS
752 if (XFASTINT (match) == ' ')
753 match = Qnil;
93da5fff 754 p += len;
d1be9f0f
RS
755 }
756 else
8ea151b2 757 match = Qnil;
8489eb67 758
8ea151b2 759 val = (int) code;
8489eb67
RS
760 while (*p)
761 switch (*p++)
762 {
763 case '1':
247e20a8 764 val |= 1 << 16;
8489eb67
RS
765 break;
766
767 case '2':
247e20a8 768 val |= 1 << 17;
8489eb67
RS
769 break;
770
771 case '3':
247e20a8 772 val |= 1 << 18;
8489eb67
RS
773 break;
774
775 case '4':
247e20a8 776 val |= 1 << 19;
8489eb67
RS
777 break;
778
779 case 'p':
247e20a8 780 val |= 1 << 20;
8489eb67 781 break;
e5d4f4dc
RS
782
783 case 'b':
247e20a8 784 val |= 1 << 21;
e5d4f4dc 785 break;
8489eb67
RS
786 }
787
93da5fff
KH
788 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
789 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
790 else
791 /* Since we can't use a shared object, let's make a new one. */
792 newentry = Fcons (make_number (val), match);
793
e704cb4b 794 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
8489eb67
RS
795
796 return Qnil;
797}
798\f
799/* Dump syntax table to buffer in human-readable format */
800
340f92b5 801static void
8489eb67
RS
802describe_syntax (value)
803 Lisp_Object value;
804{
805 register enum syntaxcode code;
e5d4f4dc 806 char desc, match, start1, start2, end1, end2, prefix, comstyle;
8489eb67 807 char str[2];
8ea151b2 808 Lisp_Object first, match_lisp;
8489eb67
RS
809
810 Findent_to (make_number (16), make_number (1));
811
8ea151b2
RS
812 if (NILP (value))
813 {
93da5fff 814 insert_string ("default\n");
8ea151b2
RS
815 return;
816 }
817
908b7fea
KH
818 if (CHAR_TABLE_P (value))
819 {
820 insert_string ("deeper char-table ...\n");
821 return;
822 }
823
8ea151b2
RS
824 if (!CONSP (value))
825 {
93da5fff 826 insert_string ("invalid\n");
8ea151b2
RS
827 return;
828 }
829
830 first = XCONS (value)->car;
831 match_lisp = XCONS (value)->cdr;
832
833 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 834 {
93da5fff 835 insert_string ("invalid\n");
8489eb67
RS
836 return;
837 }
838
e704cb4b 839 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
840 start1 = (XINT (first) >> 16) & 1;
841 start2 = (XINT (first) >> 17) & 1;
842 end1 = (XINT (first) >> 18) & 1;
843 end2 = (XINT (first) >> 19) & 1;
844 prefix = (XINT (first) >> 20) & 1;
845 comstyle = (XINT (first) >> 21) & 1;
8489eb67
RS
846
847 if ((int) code < 0 || (int) code >= (int) Smax)
848 {
849 insert_string ("invalid");
850 return;
851 }
852 desc = syntax_code_spec[(int) code];
853
854 str[0] = desc, str[1] = 0;
855 insert (str, 1);
856
93da5fff
KH
857 if (NILP (match_lisp))
858 insert (" ", 1);
859 else
860 insert_char (XINT (match_lisp));
8489eb67 861
8489eb67
RS
862 if (start1)
863 insert ("1", 1);
864 if (start2)
865 insert ("2", 1);
866
867 if (end1)
868 insert ("3", 1);
869 if (end2)
870 insert ("4", 1);
871
872 if (prefix)
873 insert ("p", 1);
e5d4f4dc
RS
874 if (comstyle)
875 insert ("b", 1);
8489eb67
RS
876
877 insert_string ("\twhich means: ");
878
0220c518 879 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
880 {
881 case Swhitespace:
882 insert_string ("whitespace"); break;
883 case Spunct:
884 insert_string ("punctuation"); break;
885 case Sword:
886 insert_string ("word"); break;
887 case Ssymbol:
888 insert_string ("symbol"); break;
889 case Sopen:
890 insert_string ("open"); break;
891 case Sclose:
892 insert_string ("close"); break;
893 case Squote:
894 insert_string ("quote"); break;
895 case Sstring:
896 insert_string ("string"); break;
897 case Smath:
898 insert_string ("math"); break;
899 case Sescape:
900 insert_string ("escape"); break;
901 case Scharquote:
902 insert_string ("charquote"); break;
903 case Scomment:
904 insert_string ("comment"); break;
905 case Sendcomment:
906 insert_string ("endcomment"); break;
907 default:
908 insert_string ("invalid");
909 return;
910 }
911
8ea151b2 912 if (!NILP (match_lisp))
8489eb67
RS
913 {
914 insert_string (", matches ");
8ea151b2 915 insert_char (XINT (match_lisp));
8489eb67
RS
916 }
917
918 if (start1)
919 insert_string (",\n\t is the first character of a comment-start sequence");
920 if (start2)
921 insert_string (",\n\t is the second character of a comment-start sequence");
922
923 if (end1)
924 insert_string (",\n\t is the first character of a comment-end sequence");
925 if (end2)
926 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
927 if (comstyle)
928 insert_string (" (comment style b)");
929
8489eb67
RS
930 if (prefix)
931 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
932
933 insert_string ("\n");
934}
935
340f92b5 936static Lisp_Object
8489eb67
RS
937describe_syntax_1 (vector)
938 Lisp_Object vector;
939{
940 struct buffer *old = current_buffer;
941 set_buffer_internal (XBUFFER (Vstandard_output));
e41819d4 942 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
93da5fff
KH
943 while (! NILP (XCHAR_TABLE (vector)->parent))
944 {
945 vector = XCHAR_TABLE (vector)->parent;
946 insert_string ("\nThe parent syntax table is:");
e41819d4
RS
947 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
948 (int *) 0, 0);
93da5fff
KH
949 }
950
36cd82fe 951 call0 (intern ("help-mode"));
8489eb67
RS
952 set_buffer_internal (old);
953 return Qnil;
954}
955
956DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
957 "Describe the syntax specifications in the syntax table.\n\
958The descriptions are inserted in a buffer, which is then displayed.")
959 ()
960{
961 internal_with_output_to_temp_buffer
962 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
963
964 return Qnil;
965}
966\f
195d1361
RS
967int parse_sexp_ignore_comments;
968
8489eb67
RS
969/* Return the position across COUNT words from FROM.
970 If that many words cannot be found before the end of the buffer, return 0.
971 COUNT negative means scan backward and stop at word beginning. */
972
973scan_words (from, count)
974 register int from, count;
975{
976 register int beg = BEGV;
977 register int end = ZV;
93da5fff
KH
978 register enum syntaxcode code;
979 int ch0, ch1;
980 int temp_pos;
8489eb67
RS
981
982 immediate_quit = 1;
983 QUIT;
984
195d1361
RS
985 SETUP_SYNTAX_TABLE (from, count);
986
8489eb67
RS
987 while (count > 0)
988 {
989 while (1)
990 {
991 if (from == end)
992 {
993 immediate_quit = 0;
994 return 0;
995 }
195d1361 996 UPDATE_SYNTAX_TABLE_FORWARD (from);
93da5fff
KH
997 ch0 = FETCH_CHAR (from);
998 code = SYNTAX (ch0);
999 INC_POS (from);
8489eb67
RS
1000 if (words_include_escapes
1001 && (code == Sescape || code == Scharquote))
1002 break;
1003 if (code == Sword)
1004 break;
8489eb67 1005 }
93da5fff
KH
1006 /* Now CH0 is a character which begins a word and FROM is the
1007 position of the next character. */
8489eb67
RS
1008 while (1)
1009 {
1010 if (from == end) break;
195d1361 1011 UPDATE_SYNTAX_TABLE_FORWARD (from);
93da5fff
KH
1012 ch1 = FETCH_CHAR (from);
1013 code = SYNTAX (ch1);
8489eb67
RS
1014 if (!(words_include_escapes
1015 && (code == Sescape || code == Scharquote)))
93da5fff 1016 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1017 break;
93da5fff
KH
1018 INC_POS (from);
1019 ch0 = ch1;
8489eb67
RS
1020 }
1021 count--;
1022 }
1023 while (count < 0)
1024 {
1025 while (1)
1026 {
1027 if (from == beg)
1028 {
1029 immediate_quit = 0;
1030 return 0;
1031 }
93da5fff 1032 DEC_POS (from);
195d1361 1033 UPDATE_SYNTAX_TABLE_BACKWARD (from);
93da5fff
KH
1034 ch1 = FETCH_CHAR (from);
1035 code = SYNTAX (ch1);
8489eb67
RS
1036 if (words_include_escapes
1037 && (code == Sescape || code == Scharquote))
1038 break;
1039 if (code == Sword)
1040 break;
8489eb67 1041 }
93da5fff
KH
1042 /* Now CH1 is a character which ends a word and FROM is the
1043 position of it. */
8489eb67
RS
1044 while (1)
1045 {
1046 if (from == beg) break;
93da5fff
KH
1047 temp_pos = from;
1048 DEC_POS (temp_pos);
195d1361 1049 UPDATE_SYNTAX_TABLE_BACKWARD (from);
93da5fff
KH
1050 ch0 = FETCH_CHAR (temp_pos);
1051 code = SYNTAX (ch0);
8489eb67
RS
1052 if (!(words_include_escapes
1053 && (code == Sescape || code == Scharquote)))
93da5fff 1054 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1055 break;
93da5fff
KH
1056 from = temp_pos;
1057 ch1 = ch0;
8489eb67
RS
1058 }
1059 count++;
1060 }
1061
1062 immediate_quit = 0;
1063
1064 return from;
1065}
1066
1067DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1068 "Move point forward ARG words (backward if ARG is negative).\n\
1069Normally returns t.\n\
1070If an edge of the buffer is reached, point is left there\n\
1071and nil is returned.")
1072 (count)
1073 Lisp_Object count;
1074{
1075 int val;
1076 CHECK_NUMBER (count, 0);
1077
6ec8bbd2 1078 if (!(val = scan_words (PT, XINT (count))))
8489eb67
RS
1079 {
1080 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1081 return Qnil;
1082 }
1083 SET_PT (val);
1084 return Qt;
1085}
1086\f
195d1361
RS
1087Lisp_Object skip_chars ();
1088
1089DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1090 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1091STRING is like the inside of a `[...]' in a regular expression\n\
1092except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1093Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1094With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1095Returns the distance traveled, either zero or positive.")
1096 (string, lim)
1097 Lisp_Object string, lim;
1098{
1099 return skip_chars (1, 0, string, lim);
1100}
1101
1102DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1103 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1104See `skip-chars-forward' for details.\n\
1105Returns the distance traveled, either zero or negative.")
1106 (string, lim)
1107 Lisp_Object string, lim;
1108{
1109 return skip_chars (0, 0, string, lim);
1110}
1111
1112DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1113 "Move point forward across chars in specified syntax classes.\n\
1114SYNTAX is a string of syntax code characters.\n\
1115Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1116If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1117This function returns the distance traveled, either zero or positive.")
1118 (syntax, lim)
1119 Lisp_Object syntax, lim;
1120{
1121 return skip_chars (1, 1, syntax, lim);
1122}
1123
1124DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1125 "Move point backward across chars in specified syntax classes.\n\
1126SYNTAX is a string of syntax code characters.\n\
1127Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1128If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1129This function returns the distance traveled, either zero or negative.")
1130 (syntax, lim)
1131 Lisp_Object syntax, lim;
1132{
1133 return skip_chars (0, 1, syntax, lim);
1134}
1135
1136Lisp_Object
1137skip_chars (forwardp, syntaxp, string, lim)
1138 int forwardp, syntaxp;
1139 Lisp_Object string, lim;
1140{
1141 register unsigned char *p, *pend;
1142 register unsigned int c;
7e68b0ea 1143 register int ch;
195d1361 1144 unsigned char fastmap[0400];
7e68b0ea
RS
1145 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1146 of which codes don't fit in FASTMAP. In that case, we set the
1147 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1148 and set the actual ranges of characters in CHAR_RANGES. In the
1149 form "X-Y" of STRING, both X and Y must belong to the same
1150 character set because a range striding across character sets is
1151 meaningless. */
1152 int *char_ranges
1153 = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1154 int n_char_ranges = 0;
195d1361
RS
1155 int negate = 0;
1156 register int i;
7e68b0ea 1157 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
195d1361
RS
1158
1159 CHECK_STRING (string, 0);
1160
1161 if (NILP (lim))
1162 XSETINT (lim, forwardp ? ZV : BEGV);
1163 else
1164 CHECK_NUMBER_COERCE_MARKER (lim, 1);
1165
1166 /* In any case, don't allow scan outside bounds of buffer. */
1167 /* jla turned this off, for no known reason.
1168 bfox turned the ZV part on, and rms turned the
1169 BEGV part back on. */
1170 if (XINT (lim) > ZV)
1171 XSETFASTINT (lim, ZV);
1172 if (XINT (lim) < BEGV)
1173 XSETFASTINT (lim, BEGV);
1174
1175 p = XSTRING (string)->data;
1176 pend = p + XSTRING (string)->size;
1177 bzero (fastmap, sizeof fastmap);
1178
1179 if (p != pend && *p == '^')
1180 {
1181 negate = 1; p++;
1182 }
1183
1184 /* Find the characters specified and set their elements of fastmap.
1185 If syntaxp, each character counts as itself.
1186 Otherwise, handle backslashes and ranges specially. */
1187
1188 while (p != pend)
1189 {
7e68b0ea
RS
1190 c = *p;
1191 if (multibyte)
1192 {
1193 ch = STRING_CHAR (p, pend - p);
1194 p += BYTES_BY_CHAR_HEAD (*p);
1195 }
1196 else
1197 {
1198 ch = c;
1199 p++;
1200 }
195d1361
RS
1201 if (syntaxp)
1202 fastmap[syntax_spec_code[c]] = 1;
1203 else
1204 {
1205 if (c == '\\')
1206 {
1207 if (p == pend) break;
1208 c = *p++;
1209 }
1210 if (p != pend && *p == '-')
1211 {
7e68b0ea
RS
1212 unsigned int ch2;
1213
195d1361
RS
1214 p++;
1215 if (p == pend) break;
7e68b0ea
RS
1216 if (SINGLE_BYTE_CHAR_P (ch))
1217 while (c <= *p)
1218 {
1219 fastmap[c] = 1;
1220 c++;
1221 }
1222 else
195d1361 1223 {
7e68b0ea
RS
1224 fastmap[c] = 1; /* C is the base leading-code. */
1225 ch2 = STRING_CHAR (p, pend - p);
1226 if (ch <= ch2)
1227 char_ranges[n_char_ranges++] = ch,
1228 char_ranges[n_char_ranges++] = ch2;
195d1361 1229 }
7e68b0ea 1230 p += multibyte ? BYTES_BY_CHAR_HEAD (*p) : 1;
195d1361
RS
1231 }
1232 else
7e68b0ea
RS
1233 {
1234 fastmap[c] = 1;
1235 if (!SINGLE_BYTE_CHAR_P (ch))
1236 {
1237 char_ranges[n_char_ranges++] = ch;
1238 char_ranges[n_char_ranges++] = ch;
1239 }
1240 }
195d1361
RS
1241 }
1242 }
1243
7e68b0ea
RS
1244 /* If ^ was the first character, complement the fastmap. In
1245 addition, as all multibyte characters have possibility of
1246 matching, set all entries for base leading codes, which is
1247 harmless even if SYNTAXP is 1. */
195d1361
RS
1248
1249 if (negate)
1250 for (i = 0; i < sizeof fastmap; i++)
7e68b0ea
RS
1251 {
1252 if (!multibyte || !BASE_LEADING_CODE_P (i))
1253 fastmap[i] ^= 1;
1254 else
1255 fastmap[i] = 1;
1256 }
195d1361
RS
1257
1258 {
1259 int start_point = PT;
1260 int pos = PT;
1261
1262 immediate_quit = 1;
1263 if (syntaxp)
1264 {
1265 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1266 if (forwardp)
1267 {
7e68b0ea 1268 if (multibyte)
195d1361 1269 {
7e68b0ea
RS
1270 while (pos < XINT (lim)
1271 && fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1272 {
1273 INC_POS (pos);
1274 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1275 }
1276 }
1277 else
1278 {
1279 while (pos < XINT (lim)
1280 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1281 {
1282 pos++;
1283 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1284 }
195d1361
RS
1285 }
1286 }
1287 else
1288 {
7e68b0ea 1289 if (multibyte)
195d1361 1290 {
7e68b0ea
RS
1291 while (pos > XINT (lim))
1292 {
1293 int savepos = pos;
1294 DEC_POS (pos);
4c8535eb 1295 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
7e68b0ea
RS
1296 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1297 {
1298 pos = savepos;
1299 break;
1300 }
7e68b0ea
RS
1301 }
1302 }
1303 else
1304 {
4c8535eb 1305 while (pos > XINT (lim))
7e68b0ea
RS
1306 {
1307 pos--;
4c8535eb
RS
1308 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1309 if (!fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1310 {
1311 pos++;
1312 break;
1313 }
7e68b0ea 1314 }
195d1361
RS
1315 }
1316 }
1317 }
1318 else
1319 {
1320 if (forwardp)
1321 {
7e68b0ea
RS
1322 if (multibyte)
1323 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos))])
1324 {
1325 if (!BASE_LEADING_CODE_P (c))
1326 pos++;
1327 else if (n_char_ranges)
1328 {
1329 /* We much check CHAR_RANGES for a multibyte
1330 character. */
1331 ch = FETCH_MULTIBYTE_CHAR (pos);
1332 for (i = 0; i < n_char_ranges; i += 2)
1333 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1334 break;
1335 if (!(negate ^ (i < n_char_ranges)))
1336 break;
1337
1338 INC_POS (pos);
1339 }
1340 else
1341 {
1342 if (!negate) break;
1343 INC_POS (pos);
1344 }
1345 }
1346 else
1347 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1348 pos++;
195d1361
RS
1349 }
1350 else
1351 {
7e68b0ea
RS
1352 if (multibyte)
1353 while (pos > XINT (lim))
1354 {
1355 int savepos = pos;
1356 DEC_POS (pos);
1357 if (fastmap[(c = FETCH_BYTE (pos))])
1358 {
1359 if (!BASE_LEADING_CODE_P (c))
1360 ;
1361 else if (n_char_ranges)
1362 {
1363 /* We much check CHAR_RANGES for a multibyte
1364 character. */
1365 ch = FETCH_MULTIBYTE_CHAR (pos);
1366 for (i = 0; i < n_char_ranges; i += 2)
1367 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1368 break;
1369 if (!(negate ^ (i < n_char_ranges)))
1370 {
1371 pos = savepos;
1372 break;
1373 }
1374 }
1375 else
1376 if (!negate)
1377 {
1378 pos = savepos;
1379 break;
1380 }
1381 }
1382 else
1383 {
1384 pos = savepos;
1385 break;
1386 }
1387 }
1388 else
1389 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1390 pos--;
195d1361
RS
1391 }
1392 }
7e68b0ea
RS
1393
1394 if (multibyte
1395 /* INC_POS or DEC_POS might have moved POS over LIM. */
1396 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1397 pos = XINT (lim);
1398
195d1361
RS
1399 SET_PT (pos);
1400 immediate_quit = 0;
1401
1402 return make_number (PT - start_point);
1403 }
1404}
1405\f
b3cfe0c8
RS
1406DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1407 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 1408Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 1409Set point to where scanning stops.\n\
b3cfe0c8
RS
1410If N comments are found as expected, with nothing except whitespace\n\
1411between them, return t; otherwise return nil.")
1412 (count)
840f481c 1413 Lisp_Object count;
b3cfe0c8
RS
1414{
1415 register int from;
1416 register int stop;
8ea151b2 1417 register int c, c1;
b3cfe0c8
RS
1418 register enum syntaxcode code;
1419 int comstyle = 0; /* style of comment encountered */
be720845 1420 int found;
840f481c 1421 int count1;
93da5fff 1422 int temp_pos;
840f481c
RS
1423
1424 CHECK_NUMBER (count, 0);
1425 count1 = XINT (count);
195d1361 1426 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
1427
1428 immediate_quit = 1;
1429 QUIT;
1430
1431 from = PT;
1432
195d1361 1433 SETUP_SYNTAX_TABLE (from, count1);
840f481c 1434 while (count1 > 0)
b3cfe0c8 1435 {
04882296 1436 do
b3cfe0c8 1437 {
04882296
KH
1438 if (from == stop)
1439 {
1440 SET_PT (from);
b7e6e612 1441 immediate_quit = 0;
04882296
KH
1442 return Qnil;
1443 }
195d1361 1444 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8
RS
1445 c = FETCH_CHAR (from);
1446 code = SYNTAX (c);
93da5fff 1447 INC_POS (from);
b3cfe0c8
RS
1448 comstyle = 0;
1449 if (from < stop && SYNTAX_COMSTART_FIRST (c)
8ea151b2
RS
1450 && (c1 = FETCH_CHAR (from),
1451 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 1452 {
7fc8191e
RS
1453 /* We have encountered a comment start sequence and we
1454 are ignoring all text inside comments. We must record
b3cfe0c8
RS
1455 the comment style this sequence begins so that later,
1456 only a comment end of the same style actually ends
7fc8191e 1457 the comment section. */
b3cfe0c8 1458 code = Scomment;
8ea151b2 1459 comstyle = SYNTAX_COMMENT_STYLE (c1);
93da5fff 1460 INC_POS (from);
b3cfe0c8 1461 }
04882296
KH
1462 }
1463 while (code == Swhitespace || code == Sendcomment);
195d1361 1464 if (code != Scomment && code != Scomment_fence)
04882296
KH
1465 {
1466 immediate_quit = 0;
93da5fff
KH
1467 DEC_POS (from);
1468 SET_PT (from);
04882296
KH
1469 return Qnil;
1470 }
1471 /* We're at the start of a comment. */
1472 while (1)
1473 {
1474 if (from == stop)
b3cfe0c8
RS
1475 {
1476 immediate_quit = 0;
04882296 1477 SET_PT (from);
b3cfe0c8
RS
1478 return Qnil;
1479 }
195d1361 1480 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 1481 c = FETCH_CHAR (from);
93da5fff 1482 INC_POS (from);
04882296
KH
1483 if (SYNTAX (c) == Sendcomment
1484 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1485 /* we have encountered a comment end of the same style
1486 as the comment sequence which began this comment
1487 section */
1488 break;
195d1361
RS
1489 if (SYNTAX (c) == Scomment_fence
1490 && comstyle == ST_COMMENT_STYLE)
1491 /* we have encountered a comment end of the same style
1492 as the comment sequence which began this comment
1493 section. */
1494 break;
04882296 1495 if (from < stop && SYNTAX_COMEND_FIRST (c)
8ea151b2
RS
1496 && (c1 = FETCH_CHAR (from),
1497 SYNTAX_COMEND_SECOND (c1))
04882296
KH
1498 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1499 /* we have encountered a comment end of the same style
1500 as the comment sequence which began this comment
1501 section */
93da5fff 1502 { INC_POS (from); break; }
b3cfe0c8 1503 }
04882296 1504 /* We have skipped one comment. */
840f481c 1505 count1--;
b3cfe0c8
RS
1506 }
1507
840f481c 1508 while (count1 < 0)
b3cfe0c8 1509 {
b3cfe0c8
RS
1510 while (from > stop)
1511 {
1512 int quoted;
1513
93da5fff 1514 DEC_POS (from);
b3cfe0c8
RS
1515 quoted = char_quoted (from);
1516 if (quoted)
195d1361
RS
1517 {
1518 DEC_POS (from);
1519 goto leave; /* ????? XXXXX */
1520 }
1521 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b3cfe0c8
RS
1522 c = FETCH_CHAR (from);
1523 code = SYNTAX (c);
1524 comstyle = 0;
7fc8191e
RS
1525 if (code == Sendcomment)
1526 comstyle = SYNTAX_COMMENT_STYLE (c);
93da5fff
KH
1527 temp_pos = from;
1528 DEC_POS (temp_pos);
b3cfe0c8 1529 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff 1530 && (c1 = FETCH_CHAR (temp_pos),
8ea151b2 1531 SYNTAX_COMEND_FIRST (c1))
93da5fff 1532 && !char_quoted (temp_pos))
b3cfe0c8 1533 {
7fc8191e 1534 /* We must record the comment style encountered so that
b3cfe0c8 1535 later, we can match only the proper comment begin
7fc8191e 1536 sequence of the same style. */
b3cfe0c8 1537 code = Sendcomment;
8ea151b2 1538 comstyle = SYNTAX_COMMENT_STYLE (c1);
93da5fff 1539 from = temp_pos;
b3cfe0c8 1540 }
ae03af34
RS
1541 if (from > stop && SYNTAX_COMSTART_SECOND (c)
1542 && (c1 = FETCH_CHAR (temp_pos),
1543 SYNTAX_COMSTART_FIRST (c1))
1544 && !char_quoted (temp_pos))
1545 {
1546 /* We must record the comment style encountered so that
1547 later, we can match only the proper comment begin
1548 sequence of the same style. */
1549 code = Scomment;
1550 from = temp_pos;
1551 }
b3cfe0c8 1552
195d1361
RS
1553 if (code == Scomment_fence)
1554 {
1555 /* Skip until first preceding unquoted comment_fence. */
1556 int found = 0, ini = from;
1557
1558 while (--from != stop)
1559 {
1560 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1561 c = FETCH_CHAR (from);
1562 if (SYNTAX (c) == Scomment_fence && !char_quoted (from))
1563 {
1564 found = 1;
1565 break;
1566 }
1567 }
1568 if (found == 0)
1569 {
1570 from = ini; /* Set point to ini + 1. */
1571 goto leave;
1572 }
1573 }
1574 else if (code == Sendcomment)
b3cfe0c8 1575 {
8f9dc2ed 1576#if 0
b3cfe0c8
RS
1577 if (code != SYNTAX (c))
1578 /* For a two-char comment ender, we can assume
1579 it does end a comment. So scan back in a simple way. */
1580 {
93da5fff 1581 if (from != stop) DEC_POS (from);
b3cfe0c8
RS
1582 while (1)
1583 {
8ea151b2
RS
1584 if ((c = FETCH_CHAR (from),
1585 SYNTAX (c) == Scomment)
b3cfe0c8
RS
1586 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1587 break;
1588 if (from == stop)
1589 {
1590 immediate_quit = 0;
c81a3712 1591 SET_PT (from);
b3cfe0c8
RS
1592 return Qnil;
1593 }
93da5fff 1594 DEC_POS (from);
b3cfe0c8 1595 if (SYNTAX_COMSTART_SECOND (c)
8ea151b2
RS
1596 && (c1 = FETCH_CHAR (from),
1597 SYNTAX_COMSTART_FIRST (c1))
b3cfe0c8
RS
1598 && SYNTAX_COMMENT_STYLE (c) == comstyle
1599 && !char_quoted (from))
1600 break;
1601 }
1602 break;
1603 }
8f9dc2ed 1604#endif /* 0 */
ea315ed6 1605 found = back_comment (from, stop, comstyle);
195d1361
RS
1606 if (found != -1) from = found;
1607#if 0
b3cfe0c8
RS
1608 /* Look back, counting the parity of string-quotes,
1609 and recording the comment-starters seen.
1610 When we reach a safe place, assume that's not in a string;
1611 then step the main scan to the earliest comment-starter seen
1612 an even number of string quotes away from the safe place.
1613
1614 OFROM[I] is position of the earliest comment-starter seen
1615 which is I+2X quotes from the comment-end.
1616 PARITY is current parity of quotes from the comment end. */
1617 {
1618 int parity = 0;
1619 char my_stringend = 0;
1620 int string_lossage = 0;
1621 int comment_end = from;
1622 int comstart_pos = 0;
1623 int comstart_parity = 0;
93da5fff 1624 int scanstart = from;
b3cfe0c8 1625
93da5fff 1626 DEC_POS (scanstart);
b3cfe0c8
RS
1627 /* At beginning of range to scan, we're outside of strings;
1628 that determines quote parity to the comment-end. */
1629 while (from != stop)
1630 {
1631 /* Move back and examine a character. */
93da5fff 1632 DEC_POS (from);
b3cfe0c8 1633
195d1361 1634 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b3cfe0c8
RS
1635 c = FETCH_CHAR (from);
1636 code = SYNTAX (c);
1637
1638 /* If this char is the second of a 2-char comment sequence,
1639 back up and give the pair the appropriate syntax. */
93da5fff
KH
1640 temp_pos = from;
1641 DEC_POS (temp_pos);
b3cfe0c8 1642 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff
KH
1643 && (c1 = FETCH_CHAR (temp_pos),
1644 SYNTAX_COMEND_FIRST (c1)))
b3cfe0c8
RS
1645 {
1646 code = Sendcomment;
93da5fff
KH
1647 from = temp_pos;
1648 c = c1;
b3cfe0c8
RS
1649 }
1650
93da5fff
KH
1651 temp_pos = from;
1652 INC_POS (temp_pos);
4841063f
RS
1653 /* If this char starts a 2-char comment start sequence,
1654 treat it like a 1-char comment starter. */
1655 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
93da5fff
KH
1656 && (c1 = FETCH_CHAR (temp_pos),
1657 SYNTAX_COMSTART_SECOND (c1))
1658 && comstyle == SYNTAX_COMMENT_STYLE (c1))
4841063f 1659 code = Scomment;
b3cfe0c8
RS
1660
1661 /* Ignore escaped characters. */
1662 if (char_quoted (from))
1663 continue;
1664
1665 /* Track parity of quotes. */
1666 if (code == Sstring)
1667 {
1668 parity ^= 1;
1669 if (my_stringend == 0)
1670 my_stringend = c;
1671 /* If we have two kinds of string delimiters.
1672 There's no way to grok this scanning backwards. */
1673 else if (my_stringend != c)
1674 string_lossage = 1;
1675 }
1676
1677 /* Record comment-starters according to that
1678 quote-parity to the comment-end. */
1679 if (code == Scomment)
1680 {
1681 comstart_parity = parity;
1682 comstart_pos = from;
1683 }
1684
1685 /* If we find another earlier comment-ender,
eb8c3be9 1686 any comment-starts earlier than that don't count
b3cfe0c8
RS
1687 (because they go with the earlier comment-ender). */
1688 if (code == Sendcomment
1689 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1690 break;
1691
1692 /* Assume a defun-start point is outside of strings. */
1693 if (code == Sopen
93da5fff 1694 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
b3cfe0c8
RS
1695 break;
1696 }
1697
1698 if (comstart_pos == 0)
1699 from = comment_end;
1700 /* If the earliest comment starter
1701 is followed by uniform paired string quotes or none,
1702 we know it can't be inside a string
1703 since if it were then the comment ender would be inside one.
1704 So it does start a comment. Skip back to it. */
1705 else if (comstart_parity == 0 && !string_lossage)
1706 from = comstart_pos;
1707 else
1708 {
1709 /* We had two kinds of string delimiters mixed up
1710 together. Decode this going forwards.
1711 Scan fwd from the previous comment ender
1712 to the one in question; this records where we
1713 last passed a comment starter. */
1714 struct lisp_parse_state state;
1715 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 1716 comment_end - 1, -10000, 0, Qnil, 0);
b3cfe0c8 1717 if (state.incomment)
195d1361 1718 from = state.comstr_start;
b3cfe0c8
RS
1719 else
1720 /* We can't grok this as a comment; scan it normally. */
1721 from = comment_end;
1722 }
1723 }
195d1361 1724#endif /* 0 */
7fc8191e
RS
1725 /* We have skipped one comment. */
1726 break;
b3cfe0c8 1727 }
195d1361 1728 else if (code != Swhitespace && code != Scomment)
b3cfe0c8 1729 {
195d1361 1730 leave:
b3cfe0c8 1731 immediate_quit = 0;
93da5fff
KH
1732 INC_POS (from);
1733 SET_PT (from);
b3cfe0c8
RS
1734 return Qnil;
1735 }
1736 }
1737
840f481c 1738 count1++;
b3cfe0c8
RS
1739 }
1740
1741 SET_PT (from);
1742 immediate_quit = 0;
1743 return Qt;
1744}
1745\f
8489eb67
RS
1746Lisp_Object
1747scan_lists (from, count, depth, sexpflag)
1748 register int from;
1749 int count, depth, sexpflag;
1750{
1751 Lisp_Object val;
195d1361 1752 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
1753 register int c, c1;
1754 int stringterm;
8489eb67
RS
1755 int quoted;
1756 int mathexit = 0;
93da5fff 1757 register enum syntaxcode code, temp_code;
195d1361 1758 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 1759 int comstyle = 0; /* style of comment encountered */
93da5fff 1760 int temp_pos;
7bf5e9e4 1761 int last_good = from;
195d1361 1762 int found;
8489eb67
RS
1763
1764 if (depth > 0) min_depth = 0;
1765
1766 immediate_quit = 1;
1767 QUIT;
1768
195d1361 1769 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
1770 while (count > 0)
1771 {
8489eb67
RS
1772 while (from < stop)
1773 {
195d1361 1774 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67 1775 c = FETCH_CHAR (from);
e5d4f4dc 1776 code = SYNTAX (c);
7bf5e9e4
RS
1777 if (depth == min_depth)
1778 last_good = from;
93da5fff 1779 INC_POS (from);
195d1361 1780 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
1781 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1782 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
1783 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1784 {
1785 /* we have encountered a comment start sequence and we
195d1361 1786 are ignoring all text inside comments. We must record
e5d4f4dc
RS
1787 the comment style this sequence begins so that later,
1788 only a comment end of the same style actually ends
1789 the comment section */
1790 code = Scomment;
1791 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
93da5fff 1792 INC_POS (from);
e5d4f4dc
RS
1793 }
1794
195d1361 1795 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
1796 if (SYNTAX_PREFIX (c))
1797 continue;
1798
0220c518 1799 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1800 {
1801 case Sescape:
1802 case Scharquote:
1803 if (from == stop) goto lose;
93da5fff 1804 INC_POS (from);
8489eb67
RS
1805 /* treat following character as a word constituent */
1806 case Sword:
1807 case Ssymbol:
1808 if (depth || !sexpflag) break;
195d1361 1809 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
1810 while (from < stop)
1811 {
195d1361 1812 UPDATE_SYNTAX_TABLE_FORWARD (from);
0220c518 1813 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1814 {
1815 case Scharquote:
1816 case Sescape:
93da5fff 1817 INC_POS (from);
8489eb67
RS
1818 if (from == stop) goto lose;
1819 break;
1820 case Sword:
1821 case Ssymbol:
1822 case Squote:
1823 break;
1824 default:
1825 goto done;
1826 }
93da5fff 1827 INC_POS (from);
8489eb67
RS
1828 }
1829 goto done;
1830
1831 case Scomment:
195d1361 1832 case Scomment_fence:
8489eb67
RS
1833 if (!parse_sexp_ignore_comments) break;
1834 while (1)
1835 {
c01f7ec0
RS
1836 if (from == stop)
1837 {
1838 if (depth == 0)
1839 goto done;
1840 goto lose;
1841 }
195d1361 1842 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 1843 c = FETCH_CHAR (from);
195d1361
RS
1844 if (code == Scomment
1845 ? (SYNTAX (c) == Sendcomment
1846 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1847 : (SYNTAX (c) == Scomment_fence))
e5d4f4dc
RS
1848 /* we have encountered a comment end of the same style
1849 as the comment sequence which began this comment
1850 section */
8489eb67 1851 break;
93da5fff 1852 INC_POS (from);
8489eb67 1853 if (from < stop && SYNTAX_COMEND_FIRST (c)
e5d4f4dc 1854 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
195d1361
RS
1855 && SYNTAX_COMMENT_STYLE (c) == comstyle
1856 && code == Scomment)
e5d4f4dc
RS
1857 /* we have encountered a comment end of the same style
1858 as the comment sequence which began this comment
1859 section */
93da5fff 1860 { INC_POS (from); break; }
8489eb67
RS
1861 }
1862 break;
1863
1864 case Smath:
1865 if (!sexpflag)
1866 break;
1867 if (from != stop && c == FETCH_CHAR (from))
93da5fff 1868 INC_POS (from);
8489eb67
RS
1869 if (mathexit)
1870 {
1871 mathexit = 0;
1872 goto close1;
1873 }
1874 mathexit = 1;
1875
1876 case Sopen:
1877 if (!++depth) goto done;
1878 break;
1879
1880 case Sclose:
1881 close1:
1882 if (!--depth) goto done;
1883 if (depth < min_depth)
7bf5e9e4
RS
1884 Fsignal (Qscan_error,
1885 Fcons (build_string ("Containing expression ends prematurely"),
1886 Fcons (make_number (last_good),
1887 Fcons (make_number (from), Qnil))));
8489eb67
RS
1888 break;
1889
1890 case Sstring:
195d1361 1891 case Sstring_fence:
93da5fff
KH
1892 temp_pos = from;
1893 DEC_POS (temp_pos);
1894 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
1895 while (1)
1896 {
1897 if (from >= stop) goto lose;
195d1361
RS
1898 UPDATE_SYNTAX_TABLE_FORWARD (from);
1899 if (code == Sstring
1900 ? (FETCH_CHAR (from) == stringterm)
1901 : SYNTAX (FETCH_CHAR (from)) == Sstring_fence)
1902 break;
0220c518 1903 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1904 {
1905 case Scharquote:
1906 case Sescape:
93da5fff 1907 INC_POS (from);
8489eb67 1908 }
93da5fff 1909 INC_POS (from);
8489eb67 1910 }
93da5fff 1911 INC_POS (from);
8489eb67
RS
1912 if (!depth && sexpflag) goto done;
1913 break;
1914 }
1915 }
1916
1917 /* Reached end of buffer. Error if within object, return nil if between */
1918 if (depth) goto lose;
1919
1920 immediate_quit = 0;
1921 return Qnil;
1922
1923 /* End of object reached */
1924 done:
1925 count--;
1926 }
1927
1928 while (count < 0)
1929 {
8489eb67
RS
1930 while (from > stop)
1931 {
93da5fff 1932 DEC_POS (from);
195d1361 1933 UPDATE_SYNTAX_TABLE_BACKWARD (from);
8489eb67 1934 if (quoted = char_quoted (from))
195d1361
RS
1935 {
1936 DEC_POS (from);
1937 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1938 }
8489eb67
RS
1939 c = FETCH_CHAR (from);
1940 code = SYNTAX (c);
7bf5e9e4
RS
1941 if (depth == min_depth)
1942 last_good = from;
7fc8191e
RS
1943 comstyle = 0;
1944 if (code == Sendcomment)
1945 comstyle = SYNTAX_COMMENT_STYLE (c);
93da5fff
KH
1946 temp_pos = from;
1947 DEC_POS (temp_pos);
8489eb67 1948 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff
KH
1949 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1950 && !char_quoted (temp_pos)
8489eb67 1951 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1952 {
1953 /* we must record the comment style encountered so that
1954 later, we can match only the proper comment begin
1955 sequence of the same style */
1956 code = Sendcomment;
93da5fff
KH
1957 comstyle = SYNTAX_COMMENT_STYLE (c1);
1958 from = temp_pos;
e5d4f4dc
RS
1959 }
1960
8489eb67
RS
1961 if (SYNTAX_PREFIX (c))
1962 continue;
1963
0220c518 1964 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
8489eb67
RS
1965 {
1966 case Sword:
1967 case Ssymbol:
1968 if (depth || !sexpflag) break;
195d1361
RS
1969 /* This word counts as a sexp; count object finished
1970 after passing it. */
8489eb67
RS
1971 while (from > stop)
1972 {
93da5fff
KH
1973 temp_pos = from;
1974 DEC_POS (temp_pos);
195d1361 1975 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff 1976 quoted = char_quoted (temp_pos);
8489eb67 1977 if (quoted)
93da5fff
KH
1978 {
1979 from = temp_pos;
1980 DEC_POS (temp_pos);
195d1361 1981 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff
KH
1982 }
1983 c1 = FETCH_CHAR (temp_pos);
1984 temp_code = SYNTAX (c1);
1985 if (! (quoted || temp_code == Sword
1986 || temp_code == Ssymbol
1987 || temp_code == Squote))
8489eb67 1988 goto done2;
93da5fff 1989 from = temp_pos;
8489eb67
RS
1990 }
1991 goto done2;
1992
1993 case Smath:
1994 if (!sexpflag)
1995 break;
93da5fff
KH
1996 temp_pos = from;
1997 DEC_POS (temp_pos);
195d1361 1998 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff
KH
1999 if (from != stop && c == FETCH_CHAR (temp_pos))
2000 from = temp_pos;
8489eb67
RS
2001 if (mathexit)
2002 {
2003 mathexit = 0;
2004 goto open2;
2005 }
2006 mathexit = 1;
2007
2008 case Sclose:
2009 if (!++depth) goto done2;
2010 break;
2011
2012 case Sopen:
2013 open2:
2014 if (!--depth) goto done2;
2015 if (depth < min_depth)
7bf5e9e4
RS
2016 Fsignal (Qscan_error,
2017 Fcons (build_string ("Containing expression ends prematurely"),
2018 Fcons (make_number (last_good),
2019 Fcons (make_number (from), Qnil))));
8489eb67
RS
2020 break;
2021
2022 case Sendcomment:
2023 if (!parse_sexp_ignore_comments)
2024 break;
8f9dc2ed 2025#if 0
37bef230
RS
2026 if (code != SYNTAX (c))
2027 /* For a two-char comment ender, we can assume
2028 it does end a comment. So scan back in a simple way. */
2029 {
93da5fff 2030 if (from != stop) DEC_POS (from);
37bef230
RS
2031 while (1)
2032 {
2033 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
2034 && SYNTAX_COMMENT_STYLE (c) == comstyle)
2035 break;
c01f7ec0
RS
2036 if (from == stop)
2037 {
2038 if (depth == 0)
2039 goto done2;
2040 goto lose;
2041 }
93da5fff 2042 DEC_POS (from);
37bef230
RS
2043 if (SYNTAX_COMSTART_SECOND (c)
2044 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
2045 && SYNTAX_COMMENT_STYLE (c) == comstyle
2046 && !char_quoted (from))
2047 break;
2048 }
2049 break;
2050 }
8f9dc2ed 2051#endif /* 0 */
ea315ed6 2052 found = back_comment (from, stop, comstyle);
195d1361
RS
2053 if (found != -1) from = found;
2054#if 0
8489eb67
RS
2055 /* Look back, counting the parity of string-quotes,
2056 and recording the comment-starters seen.
2057 When we reach a safe place, assume that's not in a string;
2058 then step the main scan to the earliest comment-starter seen
2059 an even number of string quotes away from the safe place.
2060
2061 OFROM[I] is position of the earliest comment-starter seen
2062 which is I+2X quotes from the comment-end.
2063 PARITY is current parity of quotes from the comment end. */
2064 {
8489eb67 2065 int parity = 0;
e5d4f4dc
RS
2066 char my_stringend = 0;
2067 int string_lossage = 0;
2068 int comment_end = from;
37bef230
RS
2069 int comstart_pos = 0;
2070 int comstart_parity = 0;
93da5fff
KH
2071 int scanstart = from;
2072
2073 DEC_POS (scanstart);
8489eb67
RS
2074
2075 /* At beginning of range to scan, we're outside of strings;
2076 that determines quote parity to the comment-end. */
2077 while (from != stop)
2078 {
2079 /* Move back and examine a character. */
93da5fff 2080 DEC_POS (from);
8489eb67
RS
2081
2082 c = FETCH_CHAR (from);
2083 code = SYNTAX (c);
2084
2085 /* If this char is the second of a 2-char comment sequence,
2086 back up and give the pair the appropriate syntax. */
93da5fff
KH
2087 temp_pos = from;
2088 DEC_POS (temp_pos);
8489eb67 2089 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff
KH
2090 && (c1 = FETCH_CHAR (temp_pos),
2091 SYNTAX_COMEND_FIRST (c1)))
e5d4f4dc
RS
2092 {
2093 code = Sendcomment;
93da5fff
KH
2094 from = temp_pos;
2095 c = c1;
e5d4f4dc
RS
2096 }
2097
4841063f
RS
2098 /* If this char starts a 2-char comment start sequence,
2099 treat it like a 1-char comment starter. */
93da5fff
KH
2100 temp_pos = from;
2101 INC_POS (temp_pos);
4841063f 2102 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
93da5fff
KH
2103 && (c1 = FETCH_CHAR (temp_pos),
2104 SYNTAX_COMSTART_SECOND (c1))
2105 && comstyle == SYNTAX_COMMENT_STYLE (c1))
4841063f 2106 code = Scomment;
8489eb67
RS
2107
2108 /* Ignore escaped characters. */
2109 if (char_quoted (from))
2110 continue;
2111
37bef230 2112 /* Track parity of quotes. */
8489eb67 2113 if (code == Sstring)
e5d4f4dc
RS
2114 {
2115 parity ^= 1;
2116 if (my_stringend == 0)
2117 my_stringend = c;
37bef230 2118 /* If we have two kinds of string delimiters.
e5d4f4dc
RS
2119 There's no way to grok this scanning backwards. */
2120 else if (my_stringend != c)
2121 string_lossage = 1;
2122 }
8489eb67
RS
2123
2124 /* Record comment-starters according to that
2125 quote-parity to the comment-end. */
2126 if (code == Scomment)
37bef230
RS
2127 {
2128 comstart_parity = parity;
2129 comstart_pos = from;
2130 }
8489eb67 2131
37bef230 2132 /* If we find another earlier comment-ender,
eb8c3be9 2133 any comment-starts earlier than that don't count
37bef230 2134 (because they go with the earlier comment-ender). */
e5d4f4dc
RS
2135 if (code == Sendcomment
2136 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
8489eb67 2137 break;
37bef230
RS
2138
2139 /* Assume a defun-start point is outside of strings. */
2140 if (code == Sopen
93da5fff 2141 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
37bef230 2142 break;
8489eb67 2143 }
37bef230
RS
2144
2145 if (comstart_pos == 0)
2146 from = comment_end;
2147 /* If the earliest comment starter
2148 is followed by uniform paired string quotes or none,
2149 we know it can't be inside a string
2150 since if it were then the comment ender would be inside one.
2151 So it does start a comment. Skip back to it. */
2152 else if (comstart_parity == 0 && !string_lossage)
2153 from = comstart_pos;
2154 else
e5d4f4dc
RS
2155 {
2156 /* We had two kinds of string delimiters mixed up
2157 together. Decode this going forwards.
2158 Scan fwd from the previous comment ender
2159 to the one in question; this records where we
2160 last passed a comment starter. */
2161 struct lisp_parse_state state;
37bef230 2162 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 2163 comment_end - 1, -10000, 0, Qnil, 0);
e5d4f4dc 2164 if (state.incomment)
195d1361 2165 from = state.comstr_start;
e5d4f4dc
RS
2166 else
2167 /* We can't grok this as a comment; scan it normally. */
2168 from = comment_end;
2169 }
8489eb67 2170 }
195d1361 2171#endif /* 0 */
8489eb67
RS
2172 break;
2173
195d1361
RS
2174 case Scomment_fence:
2175 case Sstring_fence:
2176 while (1)
2177 {
2178 DEC_POS (from);
2179 if (from == stop) goto lose;
2180 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2181 if (!char_quoted (from)
2182 && SYNTAX (FETCH_CHAR (from)) == code)
2183 break;
2184 }
2185 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2186 break;
2187
8489eb67
RS
2188 case Sstring:
2189 stringterm = FETCH_CHAR (from);
2190 while (1)
2191 {
2192 if (from == stop) goto lose;
93da5fff
KH
2193 temp_pos = from;
2194 DEC_POS (temp_pos);
195d1361 2195 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff
KH
2196 if (!char_quoted (temp_pos)
2197 && stringterm == FETCH_CHAR (temp_pos))
8489eb67 2198 break;
93da5fff 2199 from = temp_pos;
8489eb67 2200 }
93da5fff 2201 DEC_POS (from);
8489eb67
RS
2202 if (!depth && sexpflag) goto done2;
2203 break;
2204 }
2205 }
2206
2207 /* Reached start of buffer. Error if within object, return nil if between */
2208 if (depth) goto lose;
2209
2210 immediate_quit = 0;
2211 return Qnil;
2212
2213 done2:
2214 count++;
2215 }
2216
2217
2218 immediate_quit = 0;
1e142fb7 2219 XSETFASTINT (val, from);
8489eb67
RS
2220 return val;
2221
2222 lose:
7bf5e9e4
RS
2223 Fsignal (Qscan_error,
2224 Fcons (build_string ("Unbalanced parentheses"),
2225 Fcons (make_number (last_good),
2226 Fcons (make_number (from), Qnil))));
2227
8489eb67
RS
2228 /* NOTREACHED */
2229}
2230
8489eb67
RS
2231DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2232 "Scan from character number FROM by COUNT lists.\n\
2233Returns the character number of the position thus found.\n\
2234\n\
2235If DEPTH is nonzero, paren depth begins counting from that value,\n\
2236only places where the depth in parentheses becomes zero\n\
2237are candidates for stopping; COUNT such places are counted.\n\
2238Thus, a positive value for DEPTH means go out levels.\n\
2239\n\
2240Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2241\n\
2242If the beginning or end of (the accessible part of) the buffer is reached\n\
2243and the depth is wrong, an error is signaled.\n\
2244If the depth is right but the count is not used up, nil is returned.")
2245 (from, count, depth)
2246 Lisp_Object from, count, depth;
2247{
2248 CHECK_NUMBER (from, 0);
2249 CHECK_NUMBER (count, 1);
2250 CHECK_NUMBER (depth, 2);
2251
2252 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2253}
2254
2255DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2256 "Scan from character number FROM by COUNT balanced expressions.\n\
2257If COUNT is negative, scan backwards.\n\
2258Returns the character number of the position thus found.\n\
2259\n\
2260Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2261\n\
2262If the beginning or end of (the accessible part of) the buffer is reached\n\
2263in the middle of a parenthetical grouping, an error is signaled.\n\
2264If the beginning or end is reached between groupings\n\
2265but before count is used up, nil is returned.")
2266 (from, count)
2267 Lisp_Object from, count;
2268{
2269 CHECK_NUMBER (from, 0);
2270 CHECK_NUMBER (count, 1);
2271
2272 return scan_lists (XINT (from), XINT (count), 0, 1);
2273}
2274
2275DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2276 0, 0, 0,
2277 "Move point backward over any number of chars with prefix syntax.\n\
2278This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2279 ()
2280{
2281 int beg = BEGV;
6ec8bbd2 2282 int pos = PT;
93da5fff
KH
2283 int c;
2284 int temp_pos = pos;
2285
195d1361
RS
2286 if (pos > beg)
2287 {
2288 SETUP_SYNTAX_TABLE (pos, -1);
2289 }
93da5fff 2290 DEC_POS (temp_pos);
8489eb67 2291
93da5fff 2292 while (pos > beg && !char_quoted (temp_pos)
195d1361 2293 /* Previous statement updates syntax table. */
93da5fff
KH
2294 && ((c = FETCH_CHAR (temp_pos), SYNTAX (c) == Squote)
2295 || SYNTAX_PREFIX (c)))
2296 {
2297 pos = temp_pos;
2298 DEC_POS (temp_pos);
2299 }
8489eb67
RS
2300
2301 SET_PT (pos);
2302
2303 return Qnil;
2304}
2305\f
8489eb67 2306/* Parse forward from FROM to END,
e5d4f4dc
RS
2307 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2308 and return a description of the state of the parse at END.
c81a3712
RS
2309 If STOPBEFORE is nonzero, stop at the start of an atom.
2310 If COMMENTSTOP is nonzero, stop at the start of a comment. */
8489eb67 2311
340f92b5 2312static void
c81a3712
RS
2313scan_sexps_forward (stateptr, from, end, targetdepth,
2314 stopbefore, oldstate, commentstop)
e5d4f4dc 2315 struct lisp_parse_state *stateptr;
8489eb67
RS
2316 register int from;
2317 int end, targetdepth, stopbefore;
2318 Lisp_Object oldstate;
c81a3712 2319 int commentstop;
8489eb67
RS
2320{
2321 struct lisp_parse_state state;
2322
2323 register enum syntaxcode code;
2324 struct level { int last, prev; };
2325 struct level levelstart[100];
2326 register struct level *curlevel = levelstart;
2327 struct level *endlevel = levelstart + 100;
93da5fff 2328 int prev;
8489eb67
RS
2329 register int depth; /* Paren depth of current scanning location.
2330 level - levelstart equals this except
2331 when the depth becomes negative. */
2332 int mindepth; /* Lowest DEPTH value seen. */
2333 int start_quoted = 0; /* Nonzero means starting after a char quote */
2334 Lisp_Object tem;
93da5fff 2335 int prev_from; /* Keep one character before FROM. */
195d1361
RS
2336 int boundary_stop = commentstop == -1;
2337 int nofence;
93da5fff
KH
2338
2339 prev_from = from;
2340 DEC_POS (prev_from);
2341
2342 /* Use this macro instead of `from++'. */
2343#define INC_FROM do { prev_from = from; INC_POS (from); } while (0)
8489eb67
RS
2344
2345 immediate_quit = 1;
2346 QUIT;
2347
195d1361
RS
2348 SETUP_SYNTAX_TABLE (from, 1);
2349
265a9e55 2350 if (NILP (oldstate))
8489eb67
RS
2351 {
2352 depth = 0;
2353 state.instring = -1;
2354 state.incomment = 0;
195d1361
RS
2355 state.comstyle = 0; /* comment style a by default. */
2356 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2357 }
2358 else
2359 {
2360 tem = Fcar (oldstate);
265a9e55 2361 if (!NILP (tem))
8489eb67
RS
2362 depth = XINT (tem);
2363 else
2364 depth = 0;
2365
2366 oldstate = Fcdr (oldstate);
2367 oldstate = Fcdr (oldstate);
2368 oldstate = Fcdr (oldstate);
2369 tem = Fcar (oldstate);
195d1361
RS
2370 /* Check whether we are inside string_fence-style string: */
2371 state.instring = ( !NILP (tem)
2372 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2373 : -1);
8489eb67
RS
2374
2375 oldstate = Fcdr (oldstate);
2376 tem = Fcar (oldstate);
265a9e55 2377 state.incomment = !NILP (tem);
8489eb67
RS
2378
2379 oldstate = Fcdr (oldstate);
2380 tem = Fcar (oldstate);
265a9e55 2381 start_quoted = !NILP (tem);
e5d4f4dc
RS
2382
2383 /* if the eight element of the list is nil, we are in comment
195d1361
RS
2384 style a. If it is non-nil, we are in comment style b */
2385 oldstate = Fcdr (oldstate);
e5d4f4dc 2386 oldstate = Fcdr (oldstate);
195d1361
RS
2387 tem = Fcar (oldstate);
2388 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2389 ? ST_COMMENT_STYLE : 1 );
2390
e5d4f4dc 2391 oldstate = Fcdr (oldstate);
e5d4f4dc 2392 tem = Fcar (oldstate);
195d1361 2393 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
8489eb67
RS
2394 }
2395 state.quoted = 0;
2396 mindepth = depth;
2397
2398 curlevel->prev = -1;
2399 curlevel->last = -1;
2400
195d1361 2401 /* Enter the loop at a place appropriate for initial state. */
8489eb67
RS
2402
2403 if (state.incomment) goto startincomment;
2404 if (state.instring >= 0)
2405 {
195d1361 2406 nofence = state.instring != ST_STRING_STYLE;
8489eb67
RS
2407 if (start_quoted) goto startquotedinstring;
2408 goto startinstring;
2409 }
2410 if (start_quoted) goto startquoted;
2411
2412 while (from < end)
2413 {
195d1361 2414 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2415 code = SYNTAX (FETCH_CHAR (from));
93da5fff 2416 INC_FROM;
4c920633 2417
8f9dc2ed 2418 if (code == Scomment)
195d1361 2419 state.comstr_start = prev_from;
4c920633 2420 else if (code == Scomment_fence)
e5d4f4dc
RS
2421 {
2422 /* Record the comment style we have entered so that only
2423 the comment-end sequence of the same style actually
2424 terminates the comment section. */
195d1361
RS
2425 state.comstyle = ( code == Scomment_fence
2426 ? ST_COMMENT_STYLE
2427 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2428 state.comstr_start = prev_from;
2429 if (code != Scomment_fence) INC_FROM;
e5d4f4dc 2430 code = Scomment;
e5d4f4dc 2431 }
4c920633
RS
2432 else if (from < end)
2433 if (SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from)))
2434 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
2435 /* Duplicate code to avoid a very complex if-expression
2436 which causes trouble for the SGI compiler. */
2437 {
2438 /* Record the comment style we have entered so that only
2439 the comment-end sequence of the same style actually
2440 terminates the comment section. */
2441 state.comstyle = ( code == Scomment_fence
2442 ? ST_COMMENT_STYLE
2443 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2444 state.comstr_start = prev_from;
2445 if (code != Scomment_fence) INC_FROM;
2446 code = Scomment;
2447 }
e5d4f4dc 2448
93da5fff 2449 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from)))
8489eb67 2450 continue;
0220c518 2451 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2452 {
2453 case Sescape:
2454 case Scharquote:
2455 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2456 curlevel->last = prev_from;
8489eb67
RS
2457 startquoted:
2458 if (from == end) goto endquoted;
93da5fff 2459 INC_FROM;
8489eb67
RS
2460 goto symstarted;
2461 /* treat following character as a word constituent */
2462 case Sword:
2463 case Ssymbol:
2464 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2465 curlevel->last = prev_from;
8489eb67
RS
2466 symstarted:
2467 while (from < end)
2468 {
195d1361 2469 UPDATE_SYNTAX_TABLE_FORWARD (from);
0220c518 2470 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
2471 {
2472 case Scharquote:
2473 case Sescape:
93da5fff 2474 INC_FROM;
8489eb67
RS
2475 if (from == end) goto endquoted;
2476 break;
2477 case Sword:
2478 case Ssymbol:
2479 case Squote:
2480 break;
2481 default:
2482 goto symdone;
2483 }
93da5fff 2484 INC_FROM;
8489eb67
RS
2485 }
2486 symdone:
2487 curlevel->prev = curlevel->last;
2488 break;
2489
5a28e48c 2490 startincomment:
195d1361 2491 if (commentstop == 1)
5a28e48c
RS
2492 goto done;
2493 if (from != BEGV)
2494 {
2495 /* Enter the loop in the middle so that we find
2496 a 2-char comment ender if we start in the middle of it. */
93da5fff 2497 prev = FETCH_CHAR (prev_from);
5a28e48c
RS
2498 goto startincomment_1;
2499 }
2500 /* At beginning of buffer, enter the loop the ordinary way. */
195d1361
RS
2501 state.incomment = 1;
2502 goto commentloop;
5a28e48c 2503
8489eb67
RS
2504 case Scomment:
2505 state.incomment = 1;
195d1361
RS
2506 if (commentstop || boundary_stop) goto done;
2507 commentloop:
8489eb67
RS
2508 while (1)
2509 {
2510 if (from == end) goto done;
195d1361 2511 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc
RS
2512 prev = FETCH_CHAR (from);
2513 if (SYNTAX (prev) == Sendcomment
2514 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2515 /* Only terminate the comment section if the endcomment
2516 of the same style as the start sequence has been
2517 encountered. */
8489eb67 2518 break;
195d1361
RS
2519 if (state.comstyle == ST_COMMENT_STYLE
2520 && SYNTAX (prev) == Scomment_fence)
2521 break;
93da5fff 2522 INC_FROM;
5a28e48c 2523 startincomment_1:
8489eb67 2524 if (from < end && SYNTAX_COMEND_FIRST (prev)
e5d4f4dc
RS
2525 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
2526 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2527 /* Only terminate the comment section if the end-comment
2528 sequence of the same style as the start sequence has
2529 been encountered. */
195d1361 2530 { break; }
8489eb67 2531 }
195d1361 2532 INC_FROM;
8489eb67 2533 state.incomment = 0;
e5d4f4dc 2534 state.comstyle = 0; /* reset the comment style */
195d1361 2535 if (boundary_stop) goto done;
8489eb67
RS
2536 break;
2537
2538 case Sopen:
2539 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2540 depth++;
2541 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2542 curlevel->last = prev_from;
8489eb67
RS
2543 if (++curlevel == endlevel)
2544 error ("Nesting too deep for parser");
2545 curlevel->prev = -1;
2546 curlevel->last = -1;
30844415 2547 if (targetdepth == depth) goto done;
8489eb67
RS
2548 break;
2549
2550 case Sclose:
2551 depth--;
2552 if (depth < mindepth)
2553 mindepth = depth;
2554 if (curlevel != levelstart)
2555 curlevel--;
2556 curlevel->prev = curlevel->last;
30844415 2557 if (targetdepth == depth) goto done;
8489eb67
RS
2558 break;
2559
2560 case Sstring:
195d1361
RS
2561 case Sstring_fence:
2562 state.comstr_start = from - 1;
8489eb67 2563 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2564 curlevel->last = prev_from;
195d1361
RS
2565 state.instring = (code == Sstring
2566 ? (FETCH_CHAR (prev_from))
2567 : ST_STRING_STYLE);
2568 if (boundary_stop) goto done;
8489eb67 2569 startinstring:
195d1361
RS
2570 {
2571 nofence = state.instring != ST_STRING_STYLE;
2572
2573 while (1)
2574 {
2575 int c;
2576
2577 if (from >= end) goto done;
2578 c = FETCH_CHAR (from);
2579 if (nofence && c == state.instring) break;
2580 UPDATE_SYNTAX_TABLE_FORWARD (from);
2581 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2582 {
2583 case Sstring_fence:
2584 if (!nofence) goto string_end;
2585 break;
2586 case Scharquote:
2587 case Sescape:
2588 INC_FROM;
2589 startquotedinstring:
2590 if (from >= end) goto endquoted;
2591 }
2592 INC_FROM;
2593 }
2594 }
2595 string_end:
8489eb67
RS
2596 state.instring = -1;
2597 curlevel->prev = curlevel->last;
93da5fff 2598 INC_FROM;
195d1361 2599 if (boundary_stop) goto done;
8489eb67
RS
2600 break;
2601
2602 case Smath:
2603 break;
2604 }
2605 }
2606 goto done;
2607
2608 stop: /* Here if stopping before start of sexp. */
93da5fff 2609 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
2610 goto done; /* but return the position before it. */
2611
2612 endquoted:
2613 state.quoted = 1;
2614 done:
2615 state.depth = depth;
2616 state.mindepth = mindepth;
2617 state.thislevelstart = curlevel->prev;
2618 state.prevlevelstart
2619 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2620 state.location = from;
2621 immediate_quit = 0;
2622
e5d4f4dc 2623 *stateptr = state;
8489eb67
RS
2624}
2625
2626/* This comment supplies the doc string for parse-partial-sexp,
2627 for make-docfile to see. We cannot put this in the real DEFUN
2628 due to limits in the Unix cpp.
2629
c81a3712 2630DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
2631 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2632Parsing stops at TO or when certain criteria are met;\n\
2633 point is set to where parsing stops.\n\
2634If fifth arg STATE is omitted or nil,\n\
2635 parsing assumes that FROM is the beginning of a function.\n\
195d1361 2636Value is a list of nine elements describing final state of parsing:\n\
af50f9e5
RS
2637 0. depth in parens.\n\
2638 1. character address of start of innermost containing list; nil if none.\n\
2639 2. character address of start of last complete sexp terminated.\n\
2640 3. non-nil if inside a string.\n\
195d1361
RS
2641 (it is the character that will terminate the string,\n\
2642 or t if the string should be terminated by an explicit\n\
2643 `syntax-table' property.)\n\
af50f9e5
RS
2644 4. t if inside a comment.\n\
2645 5. t if following a quote character.\n\
2646 6. the minimum paren-depth encountered during this scan.\n\
195d1361
RS
2647 7. t if in a comment of style `b'; `syntax-table' if given by an explicit\n\
2648 `syntax-table' property.\n\
2649 8. character address of start of last comment or string; nil if none.\n\
8489eb67
RS
2650If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2651in parentheses becomes equal to TARGETDEPTH.\n\
2652Fourth arg STOPBEFORE non-nil means stop when come to\n\
2653 any character that starts a sexp.\n\
ec11639d 2654Fifth arg STATE is an eight-list like what this function returns.\n\
a4275ad1 2655It is used to initialize the state of the parse. Its second and third
c81a3712 2656elements are ignored.
195d1361
RS
2657Sixth arg COMMENTSTOP non-nil means stop at the start of a comment. If\n\
2658it is `syntax-table', stop after the start of a comment or a string, or\n\
2659after end of a comment or a string.")
c81a3712 2660 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
2661*/
2662
c81a3712 2663DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 2664 0 /* See immediately above */)
c81a3712
RS
2665 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2666 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
2667{
2668 struct lisp_parse_state state;
2669 int target;
2670
265a9e55 2671 if (!NILP (targetdepth))
8489eb67
RS
2672 {
2673 CHECK_NUMBER (targetdepth, 3);
2674 target = XINT (targetdepth);
2675 }
2676 else
2677 target = -100000; /* We won't reach this depth */
2678
2679 validate_region (&from, &to);
e5d4f4dc 2680 scan_sexps_forward (&state, XINT (from), XINT (to),
c81a3712 2681 target, !NILP (stopbefore), oldstate,
195d1361
RS
2682 (NILP (commentstop)
2683 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
2684
2685 SET_PT (state.location);
2686
2687 return Fcons (make_number (state.depth),
2688 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2689 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
195d1361
RS
2690 Fcons (state.instring >= 0
2691 ? (state.instring == ST_STRING_STYLE
2692 ? Qt : make_number (state.instring)) : Qnil,
8489eb67
RS
2693 Fcons (state.incomment ? Qt : Qnil,
2694 Fcons (state.quoted ? Qt : Qnil,
e5d4f4dc 2695 Fcons (make_number (state.mindepth),
195d1361
RS
2696 Fcons (state.comstyle
2697 ? (state.comstyle == ST_COMMENT_STYLE
2698 ? Qsyntax_table : Qt) : Qnil,
2699 Fcons (state.comstr_start != -1 ? make_number (state.comstr_start) : Qnil,
2700 Qnil)))))))));
8489eb67
RS
2701}
2702\f
2703init_syntax_once ()
2704{
78f9a1f7 2705 register int i, c;
8ea151b2 2706 Lisp_Object temp;
8489eb67 2707
5ebaddf5
RS
2708 /* This has to be done here, before we call Fmake_char_table. */
2709 Qsyntax_table = intern ("syntax-table");
2710 staticpro (&Qsyntax_table);
2711
2712 /* Intern this now in case it isn't already done.
2713 Setting this variable twice is harmless.
2714 But don't staticpro it here--that is done in alloc.c. */
2715 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2716
93da5fff
KH
2717 /* Create objects which can be shared among syntax tables. */
2718 Vsyntax_code_object = Fmake_vector (13, Qnil);
2719 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2720 XVECTOR (Vsyntax_code_object)->contents[i]
2721 = Fcons (make_number (i), Qnil);
2722
5ebaddf5
RS
2723 /* Now we are ready to set up this property, so we can
2724 create syntax tables. */
2725 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2726
93da5fff 2727 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 2728
5ebaddf5 2729 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 2730
93da5fff 2731 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 2732 for (i = 'a'; i <= 'z'; i++)
8ea151b2 2733 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2734 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 2735 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2736 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
2737 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2738
2739 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2740 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2741
2742 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2743 Fcons (make_number (Sopen), make_number (')')));
2744 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2745 Fcons (make_number (Sclose), make_number ('(')));
2746 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2747 Fcons (make_number (Sopen), make_number (']')));
2748 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2749 Fcons (make_number (Sclose), make_number ('[')));
2750 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2751 Fcons (make_number (Sopen), make_number ('}')));
2752 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2753 Fcons (make_number (Sclose), make_number ('{')));
2754 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2755 Fcons (make_number ((int) Sstring), Qnil));
2756 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2757 Fcons (make_number ((int) Sescape), Qnil));
2758
93da5fff 2759 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 2760 for (i = 0; i < 10; i++)
78f9a1f7
KH
2761 {
2762 c = "_-+*/&|<>="[i];
2763 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2764 }
8489eb67 2765
93da5fff 2766 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 2767 for (i = 0; i < 12; i++)
78f9a1f7
KH
2768 {
2769 c = ".,;:?!#@~^'`"[i];
2770 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2771 }
8489eb67
RS
2772}
2773
2774syms_of_syntax ()
2775{
2776 Qsyntax_table_p = intern ("syntax-table-p");
2777 staticpro (&Qsyntax_table_p);
2778
93da5fff
KH
2779 staticpro (&Vsyntax_code_object);
2780
7bf5e9e4
RS
2781 Qscan_error = intern ("scan-error");
2782 staticpro (&Qscan_error);
2783 Fput (Qscan_error, Qerror_conditions,
2784 Fcons (Qerror, Qnil));
2785 Fput (Qscan_error, Qerror_message,
2786 build_string ("Scan error"));
2787
8489eb67
RS
2788 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2789 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2790
195d1361
RS
2791 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2792 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2793The value of this property should be either a syntax table, or a cons\n\
2794of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2795syntax code, MATCHCHAR being nil or the character to match (which is\n\
2796relevant only for open/close type.");
2797
8489eb67
RS
2798 words_include_escapes = 0;
2799 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2800 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2801
2802 defsubr (&Ssyntax_table_p);
2803 defsubr (&Ssyntax_table);
2804 defsubr (&Sstandard_syntax_table);
2805 defsubr (&Scopy_syntax_table);
2806 defsubr (&Sset_syntax_table);
2807 defsubr (&Schar_syntax);
beefa22e 2808 defsubr (&Smatching_paren);
8489eb67
RS
2809 defsubr (&Smodify_syntax_entry);
2810 defsubr (&Sdescribe_syntax);
2811
2812 defsubr (&Sforward_word);
2813
195d1361
RS
2814 defsubr (&Sskip_chars_forward);
2815 defsubr (&Sskip_chars_backward);
2816 defsubr (&Sskip_syntax_forward);
2817 defsubr (&Sskip_syntax_backward);
2818
b3cfe0c8 2819 defsubr (&Sforward_comment);
8489eb67
RS
2820 defsubr (&Sscan_lists);
2821 defsubr (&Sscan_sexps);
2822 defsubr (&Sbackward_prefix_chars);
2823 defsubr (&Sparse_partial_sexp);
2824}