(looking_at_1): Use bytepos to call re_search_2.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
b745025d 2 Copyright (C) 1985, 87, 93, 94, 95, 1997 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67
RS
23#include <ctype.h>
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
93da5fff 27#include "charset.h"
195d1361
RS
28
29/* Make syntax table lookup grant data in gl_state. */
30#define SYNTAX_ENTRY_VIA_PROPERTY
31
8489eb67 32#include "syntax.h"
195d1361
RS
33#include "intervals.h"
34
35/* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39#define ST_COMMENT_STYLE (256 + 1)
40#define ST_STRING_STYLE (256 + 2)
93da5fff 41#include "category.h"
8489eb67 42
7bf5e9e4 43Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67 44
340f92b5
RS
45static void scan_sexps_forward ();
46static int char_quoted ();
47
8489eb67 48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
8ea151b2
RS
51/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54Lisp_Object syntax_temp;
55
e5d4f4dc
RS
56/* This is the internal form of the parse state used in parse-partial-sexp. */
57
58struct lisp_parse_state
59 {
195d1361
RS
60 int depth; /* Depth at end of parsing. */
61 int instring; /* -1 if not within string, else desired terminator. */
62 int incomment; /* Nonzero if within a comment at end of parsing. */
63 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
64 int quoted; /* Nonzero if just after an escape char at end of parsing */
65 int thislevelstart; /* Char number of most recent start-of-expression at current level */
66 int prevlevelstart; /* Char number of start of containing expression */
195d1361 67 int location; /* Char number at which parsing stopped. */
e5d4f4dc 68 int mindepth; /* Minimum depth seen while scanning. */
195d1361 69 int comstr_start; /* Position just after last comment/string starter. */
e5d4f4dc
RS
70 };
71\f
37bef230
RS
72/* These variables are a cache for finding the start of a defun.
73 find_start_pos is the place for which the defun start was found.
74 find_start_value is the defun start position found for it.
75 find_start_buffer is the buffer it was found in.
76 find_start_begv is the BEGV value when it was found.
77 find_start_modiff is the value of MODIFF when it was found. */
78
79static int find_start_pos;
80static int find_start_value;
81static struct buffer *find_start_buffer;
82static int find_start_begv;
83static int find_start_modiff;
195d1361
RS
84\f
85
86struct gl_state_s gl_state; /* Global state of syntax parser. */
87
88INTERVAL interval_of ();
89#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
90 to scan to property-change. */
91
92/*
93 Update gl_state to an appropriate interval which contains POS. The
94 sign of COUNT give the relative position of POS wrt the previously
95 valid interval. If INIT, only [be]_property fields of gl_state are
96 valid at start, the rest is filled basing on OBJECT.
97
98 `gl_state.*_i' are the intervals, and pos is further in the search
99 direction than the intervals - or in an interval. We update the
100 current syntax-table basing on the property of this interval, and
101 update the interval to start further than POS - or be
102 NULL_INTERVAL. We also update lim_property to be the next value of
103 pos to call this subroutine again - or be before/after the
104 start/end of OBJECT. */
105
106void
107update_syntax_table (pos, count, init, object)
108 int pos, count, init;
109 Lisp_Object object;
110{
111 Lisp_Object tmp_table;
112 int cnt = 0, doing_extra = 0, invalidate = 1;
113 INTERVAL i, oldi;
114
115 if (init)
116 {
117 gl_state.start = gl_state.b_property;
118 gl_state.stop = gl_state.e_property;
119 gl_state.forward_i = interval_of (pos, object);
120 i = gl_state.backward_i = gl_state.forward_i;
121 gl_state.left_ok = gl_state.right_ok = 1;
122 invalidate = 0;
123 if (NULL_INTERVAL_P (i))
124 return;
125 gl_state.b_property = i->position - 1;
126 gl_state.e_property = INTERVAL_LAST_POS (i);
127 goto update;
128 }
129 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
130
131 /* We are guarantied to be called with pos either in i, of further off. */
132 if (NULL_INTERVAL_P (i))
133 error ("Error in syntax_table logic for to-the-end intervals");
134 else if (pos < i->position) /* Move left. */
135 {
136 if (count > 0)
137 error ("Error in syntax_table logic for intervals <-.");
138 /* Update the interval. */
139 i = update_interval (i, pos);
140 if (oldi->position != INTERVAL_LAST_POS (i))
141 {
142 invalidate = 0;
143 gl_state.right_ok = 1; /* Invalidate the other end. */
144 gl_state.forward_i = i;
145 gl_state.e_property = INTERVAL_LAST_POS (i);
146 }
147 }
148 else if (pos >= INTERVAL_LAST_POS (i)) /* Move right. */
149 {
150 if (count < 0)
151 error ("Error in syntax_table logic for intervals ->.");
152 /* Update the interval. */
153 i = update_interval (i, pos);
154 if (i->position != INTERVAL_LAST_POS (oldi))
155 {
156 invalidate = 0;
157 gl_state.left_ok = 1; /* Invalidate the other end. */
158 gl_state.backward_i = i;
159 gl_state.b_property = i->position - 1;
160 }
161 }
162 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
163 {
164 /* We do not need to recalculate tmp_table. */
165 tmp_table = gl_state.old_prop;
166 }
167
168 update:
169 tmp_table = textget (i->plist, Qsyntax_table);
170
171 if (invalidate)
172 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
173
174 if (invalidate) /* Did not get to adjacent interval. */
175 { /* with the same table => */
176 /* invalidate the old range. */
177 if (count > 0)
178 {
179 gl_state.backward_i = i;
180 gl_state.left_ok = 1; /* Invalidate the other end. */
181 gl_state.b_property = i->position - 1;
182 }
183 else
184 {
185 gl_state.forward_i = i;
186 gl_state.right_ok = 1; /* Invalidate the other end. */
187 gl_state.e_property = INTERVAL_LAST_POS (i);
188 }
189 }
37bef230 190
195d1361
RS
191 gl_state.current_syntax_table = tmp_table;
192 gl_state.old_prop = tmp_table;
2e34157c 193 if (EQ (Fsyntax_table_p (tmp_table), Qt))
195d1361
RS
194 {
195 gl_state.use_global = 0;
196 }
197 else if (CONSP (tmp_table))
198 {
199 gl_state.use_global = 1;
200 gl_state.global_code = tmp_table;
201 }
202 else
203 {
204 gl_state.use_global = 0;
205 gl_state.current_syntax_table = current_buffer->syntax_table;
206 }
207
208 while (!NULL_INTERVAL_P (i))
209 {
210 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
211 {
212 if (count > 0)
213 gl_state.right_ok = 0;
214 else
215 gl_state.left_ok = 0;
216 break;
217 }
218 else if (cnt == INTERVALS_AT_ONCE)
219 {
220 if (count > 0)
221 gl_state.right_ok = 1;
222 else
223 gl_state.left_ok = 1;
224 break;
225 }
226 cnt++;
227 i = count > 0 ? next_interval (i) : previous_interval (i);
228 }
229 if (NULL_INTERVAL_P (i))
230 { /* This property goes to the end. */
231 if (count > 0)
232 gl_state.e_property = gl_state.stop;
233 else
234 gl_state.b_property = gl_state.start;
235 }
236 else
237 {
238 if (count > 0)
239 {
240 gl_state.e_property = i->position;
241 gl_state.forward_i = i;
242 }
243 else
244 {
245 gl_state.b_property = i->position + LENGTH (i) - 1;
246 gl_state.backward_i = i;
247 }
248 }
249}
250\f
251/* Returns TRUE if char at POS is quoted.
252 Global syntax-table data should be set up already to be good at pos
253 or after. On return global syntax data is good for lookup at POS. */
254
255static int
256char_quoted (pos)
257 register int pos;
258{
259 register enum syntaxcode code;
260 register int beg = BEGV;
261 register int quoted = 0;
262 int temp_pos = pos;
263
264 DEC_POS (temp_pos);
265 while (temp_pos >= beg
266 && ( UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos), 1)
267 && ((code = SYNTAX (FETCH_CHAR (temp_pos))) == Scharquote
268 || code == Sescape))
269 {
270 temp_pos--, quoted = !quoted;
271 }
272 UPDATE_SYNTAX_TABLE (pos);
273 return quoted;
274}
275\f
37bef230
RS
276/* Find a defun-start that is the last one before POS (or nearly the last).
277 We record what we find, so that another call in the same area
195d1361
RS
278 can return the same value right away.
279
280 There is no promise at which position the global syntax data is
281 valid on return from the subroutine, so the caller should explicitly
282 update the global data. */
37bef230
RS
283
284static int
285find_defun_start (pos)
286 int pos;
287{
288 int tem;
289 int shortage;
290
291 /* Use previous finding, if it's valid and applies to this inquiry. */
292 if (current_buffer == find_start_buffer
293 /* Reuse the defun-start even if POS is a little farther on.
294 POS might be in the next defun, but that's ok.
295 Our value may not be the best possible, but will still be usable. */
296 && pos <= find_start_pos + 1000
297 && pos >= find_start_value
298 && BEGV == find_start_begv
299 && MODIFF == find_start_modiff)
300 return find_start_value;
301
302 /* Back up to start of line. */
be5357e2 303 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
37bef230 304
195d1361
RS
305 /* We optimize syntax-table lookup for rare updates. Thus we accept
306 only those `^\s(' which are good in global _and_ text-property
307 syntax-tables. */
308 gl_state.current_syntax_table = current_buffer->syntax_table;
309 gl_state.use_global = 0;
37bef230
RS
310 while (tem > BEGV)
311 {
312 /* Open-paren at start of line means we found our defun-start. */
313 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
195d1361
RS
314 {
315 SETUP_SYNTAX_TABLE (tem + 1, -1); /* Try again... */
316 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
317 break;
318 /* Now fallback to the default value. */
319 gl_state.current_syntax_table = current_buffer->syntax_table;
320 gl_state.use_global = 0;
321 }
37bef230 322 /* Move to beg of previous line. */
be5357e2 323 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
37bef230
RS
324 }
325
326 /* Record what we found, for the next try. */
327 find_start_value = tem;
328 find_start_buffer = current_buffer;
329 find_start_modiff = MODIFF;
330 find_start_begv = BEGV;
331 find_start_pos = pos;
332
333 return find_start_value;
334}
335\f
527a32d9
KH
336/* Checks whether FROM is at the end of a comment;
337 and if so, returns position of the start of the comment.
338 But does not move back before STOP.
339 Returns -1 if there is no comment ending at FROM.
340
341 Global syntax data remains valid for backward search starting at
342 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
343
344static int
ea315ed6
RS
345back_comment (from, stop, comstyle)
346 int from, stop, comstyle;
195d1361
RS
347{
348 /* Look back, counting the parity of string-quotes,
349 and recording the comment-starters seen.
350 When we reach a safe place, assume that's not in a string;
351 then step the main scan to the earliest comment-starter seen
352 an even number of string quotes away from the safe place.
353
354 OFROM[I] is position of the earliest comment-starter seen
355 which is I+2X quotes from the comment-end.
356 PARITY is current parity of quotes from the comment end. */
357 int parity = 0;
358 int my_stringend = 0;
359 int string_lossage = 0;
360 int comment_end = from;
361 int comstart_pos = 0;
362 int comstart_parity = 0;
363 int scanstart = from - 1;
364 register enum syntaxcode code;
ea315ed6 365 int c;
195d1361
RS
366
367 /* At beginning of range to scan, we're outside of strings;
368 that determines quote parity to the comment-end. */
369 while (from != stop)
370 {
371 /* Move back and examine a character. */
372 DEC_POS (from);
373 UPDATE_SYNTAX_TABLE_BACKWARD (from);
374
375 c = FETCH_CHAR (from);
376 code = SYNTAX (c);
377
527a32d9 378 /* If this char is the second of a 2-char comment end sequence,
195d1361
RS
379 back up and give the pair the appropriate syntax. */
380 if (from > stop && SYNTAX_COMEND_SECOND (c)
381 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from - 1)))
382 {
383 code = Sendcomment;
384 DEC_POS (from);
385 /* This is apparently the best we can do: */
386 UPDATE_SYNTAX_TABLE_BACKWARD (from);
387 c = FETCH_CHAR (from);
388 }
389
390 /* If this char starts a 2-char comment start sequence,
391 treat it like a 1-char comment starter. */
527a32d9
KH
392 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
393 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from + 1))
394 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (from + 1)))
395 code = Scomment;
195d1361
RS
396
397 /* Ignore escaped characters. */
398 if (char_quoted (from))
399 continue;
400
401 /* Track parity of quotes. */
402 if (code == Sstring)
403 {
404 parity ^= 1;
405 if (my_stringend == 0)
406 my_stringend = c;
407 /* If we have two kinds of string delimiters.
408 There's no way to grok this scanning backwards. */
409 else if (my_stringend != c)
410 string_lossage = 1;
411 }
412
413 if (code == Sstring_fence || code == Scomment_fence)
414 {
415 parity ^= 1;
416 if (my_stringend == 0)
417 my_stringend =
418 code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
419 /* If we have two kinds of string delimiters.
420 There's no way to grok this scanning backwards. */
421 else if (my_stringend != (code == Sstring_fence
422 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
423 string_lossage = 1;
424 }
425
426 /* Record comment-starters according to that
427 quote-parity to the comment-end. */
428 if (code == Scomment)
429 {
430 comstart_parity = parity;
431 comstart_pos = from;
432 }
433
434 /* If we find another earlier comment-ender,
435 any comment-starts earlier than that don't count
436 (because they go with the earlier comment-ender). */
437 if (code == Sendcomment
438 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
439 break;
440
441 /* Assume a defun-start point is outside of strings. */
442 if (code == Sopen
443 && (from == stop || FETCH_CHAR (from - 1) == '\n'))
444 break;
445 }
446
447 if (comstart_pos == 0)
448 {
449 from = comment_end;
450 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
451 }
452 /* If the earliest comment starter
453 is followed by uniform paired string quotes or none,
454 we know it can't be inside a string
455 since if it were then the comment ender would be inside one.
456 So it does start a comment. Skip back to it. */
457 else if (comstart_parity == 0 && !string_lossage)
458 {
459 from = comstart_pos;
460 /* Globals are correct now. */
461 }
462 else
463 {
464 /* We had two kinds of string delimiters mixed up
465 together. Decode this going forwards.
466 Scan fwd from the previous comment ender
467 to the one in question; this records where we
468 last passed a comment starter. */
469 struct lisp_parse_state state;
470 scan_sexps_forward (&state, find_defun_start (comment_end),
471 comment_end - 1, -10000, 0, Qnil, 0);
472 if (state.incomment)
473 {
474 /* scan_sexps_forward changed the direction of search in
475 global variables, so we need to update it completely. */
476
477 from = state.comstr_start;
478 }
479 else
480 {
481 from = comment_end;
482 }
483 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
484 }
485
486 return from;
487}
488\f
8489eb67 489DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
2203e1e8 490 "Return t if OBJECT is a syntax table.\n\
8ea151b2 491Currently, any char-table counts as a syntax table.")
2203e1e8
EN
492 (object)
493 Lisp_Object object;
8489eb67 494{
2203e1e8 495 if (CHAR_TABLE_P (object)
e704cb4b 496 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
497 return Qt;
498 return Qnil;
499}
500
8ea151b2 501static void
8489eb67
RS
502check_syntax_table (obj)
503 Lisp_Object obj;
504{
d1be9f0f 505 if (!(CHAR_TABLE_P (obj)
e704cb4b 506 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 507 wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
508}
509
8489eb67
RS
510DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
511 "Return the current syntax table.\n\
512This is the one specified by the current buffer.")
513 ()
514{
515 return current_buffer->syntax_table;
516}
517
518DEFUN ("standard-syntax-table", Fstandard_syntax_table,
519 Sstandard_syntax_table, 0, 0, 0,
520 "Return the standard syntax table.\n\
521This is the one used for new buffers.")
522 ()
523{
524 return Vstandard_syntax_table;
525}
526
527DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
528 "Construct a new syntax table and return it.\n\
529It is a copy of the TABLE, which defaults to the standard syntax table.")
530 (table)
531 Lisp_Object table;
532{
8ea151b2
RS
533 Lisp_Object copy;
534
265a9e55 535 if (!NILP (table))
8ea151b2
RS
536 check_syntax_table (table);
537 else
538 table = Vstandard_syntax_table;
539
540 copy = Fcopy_sequence (table);
0f867324
RS
541
542 /* Only the standard syntax table should have a default element.
543 Other syntax tables should inherit from parents instead. */
544 XCHAR_TABLE (copy)->defalt = Qnil;
545
546 /* Copied syntax tables should all have parents.
547 If we copied one with no parent, such as the standard syntax table,
548 use the standard syntax table as the copy's parent. */
549 if (NILP (XCHAR_TABLE (copy)->parent))
550 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 551 return copy;
8489eb67
RS
552}
553
554DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
555 "Select a new syntax table for the current buffer.\n\
556One argument, a syntax table.")
557 (table)
558 Lisp_Object table;
559{
8ea151b2 560 check_syntax_table (table);
8489eb67
RS
561 current_buffer->syntax_table = table;
562 /* Indicate that this buffer now has a specified syntax table. */
f110a664
RS
563 current_buffer->local_var_flags
564 |= XFASTINT (buffer_local_flags.syntax_table);
8489eb67
RS
565 return table;
566}
567\f
568/* Convert a letter which signifies a syntax code
569 into the code it signifies.
195d1361 570 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
571
572unsigned char syntax_spec_code[0400] =
573 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
574 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
575 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
576 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 577 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
578 (char) Smath, 0377, 0377, (char) Squote,
579 (char) Sopen, (char) Sclose, 0377, 0377,
580 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
581 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
582 0377, 0377, 0377, 0377,
583 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 584 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
585 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
586 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
587 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
588 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
589 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
590 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 591 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
592 };
593
195d1361 594/* Indexed by syntax code, give the letter that describes it. */
8489eb67 595
195d1361 596char syntax_code_spec[16] =
8489eb67 597 {
195d1361
RS
598 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
599 '!', '|'
8489eb67 600 };
93da5fff
KH
601
602/* Indexed by syntax code, give the object (cons of syntax code and
603 nil) to be stored in syntax table. Since these objects can be
604 shared among syntax tables, we generate them in advance. By
605 sharing objects, the function `describe-syntax' can give a more
606 compact listing. */
607static Lisp_Object Vsyntax_code_object;
608
8489eb67 609\f
8ea151b2
RS
610/* Look up the value for CHARACTER in syntax table TABLE's parent
611 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
612 for CHARACTER. It's actually used only when not compiled with GCC. */
613
614Lisp_Object
615syntax_parent_lookup (table, character)
616 Lisp_Object table;
617 int character;
618{
619 Lisp_Object value;
620
621 while (1)
622 {
623 table = XCHAR_TABLE (table)->parent;
624 if (NILP (table))
625 return Qnil;
626
627 value = XCHAR_TABLE (table)->contents[character];
628 if (!NILP (value))
629 return value;
630 }
631}
632
8489eb67 633DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
2203e1e8
EN
634 "Return the syntax code of CHARACTER, described by a character.\n\
635For example, if CHARACTER is a word constituent,\n\
636the character `w' is returned.\n\
8489eb67
RS
637The characters that correspond to various syntax codes\n\
638are listed in the documentation of `modify-syntax-entry'.")
2203e1e8
EN
639 (character)
640 Lisp_Object character;
8489eb67 641{
8ea151b2 642 int char_int;
195d1361
RS
643 gl_state.current_syntax_table = current_buffer->syntax_table;
644
645 gl_state.use_global = 0;
2203e1e8
EN
646 CHECK_NUMBER (character, 0);
647 char_int = XINT (character);
8ea151b2 648 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
649}
650
651DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
2203e1e8
EN
652 "Return the matching parenthesis of CHARACTER, or nil if none.")
653 (character)
654 Lisp_Object character;
beefa22e 655{
8ea151b2 656 int char_int, code;
195d1361
RS
657 gl_state.current_syntax_table = current_buffer->syntax_table;
658 gl_state.use_global = 0;
2203e1e8
EN
659 CHECK_NUMBER (character, 0);
660 char_int = XINT (character);
8ea151b2 661 code = SYNTAX (char_int);
a8bd7cd8 662 if (code == Sopen || code == Sclose)
2e34157c 663 return SYNTAX_MATCH (char_int);
beefa22e 664 return Qnil;
8489eb67
RS
665}
666
667/* This comment supplies the doc string for modify-syntax-entry,
668 for make-docfile to see. We cannot put this in the real DEFUN
669 due to limits in the Unix cpp.
670
2fcea188 671DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
672 "Set syntax for character CHAR according to string S.\n\
673The syntax is changed only for table TABLE, which defaults to\n\
674 the current buffer's syntax table.\n\
675The first character of S should be one of the following:\n\
32676c08
JB
676 Space or - whitespace syntax. w word constituent.\n\
677 _ symbol constituent. . punctuation.\n\
678 ( open-parenthesis. ) close-parenthesis.\n\
679 \" string quote. \\ escape.\n\
680 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 681 < comment starter. > comment ender.\n\
6cb71bf6 682 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 683\n\
8489eb67
RS
684Only single-character comment start and end sequences are represented thus.\n\
685Two-character sequences are represented as described below.\n\
686The second character of S is the matching parenthesis,\n\
687 used only if the first character is `(' or `)'.\n\
688Any additional characters are flags.\n\
e5d4f4dc 689Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
2203e1e8
EN
690 1 means CHAR is the start of a two-char comment start sequence.\n\
691 2 means CHAR is the second character of such a sequence.\n\
692 3 means CHAR is the start of a two-char comment end sequence.\n\
693 4 means CHAR is the second character of such a sequence.\n\
e5d4f4dc 694\n\
195d1361 695There can be up to two orthogonal comment sequences. This is to support\n\
e5d4f4dc 696language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
697a, but you can set the comment sequence style to b (on the second character\n\
698of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc 699this flag:\n\
2203e1e8 700 b means CHAR is part of comment sequence b.\n\
e5d4f4dc 701\n\
2203e1e8 702 p means CHAR is a prefix character for `backward-prefix-chars';\n\
e5d4f4dc 703 such characters are treated as whitespace when they occur\n\
8489eb67 704 between expressions.")
2fcea188 705 (char, s, table)
8489eb67
RS
706*/
707
708DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
709 /* I really don't know why this is interactive
710 help-form should at least be made useful whilst reading the second arg
711 */
712 "cSet syntax for character: \nsSet syntax for %s to: ",
713 0 /* See immediately above */)
714 (c, newentry, syntax_table)
715 Lisp_Object c, newentry, syntax_table;
716{
8ea151b2 717 register unsigned char *p;
8489eb67 718 register enum syntaxcode code;
247e20a8 719 int val;
8ea151b2 720 Lisp_Object match;
8489eb67
RS
721
722 CHECK_NUMBER (c, 0);
723 CHECK_STRING (newentry, 1);
8ea151b2 724
265a9e55 725 if (NILP (syntax_table))
8489eb67
RS
726 syntax_table = current_buffer->syntax_table;
727 else
8ea151b2 728 check_syntax_table (syntax_table);
8489eb67
RS
729
730 p = XSTRING (newentry)->data;
731 code = (enum syntaxcode) syntax_spec_code[*p++];
732 if (((int) code & 0377) == 0377)
2e34157c 733 error ("invalid syntax description letter: %c", p[-1]);
8489eb67 734
8ea151b2
RS
735 if (code == Sinherit)
736 {
e704cb4b 737 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
8ea151b2
RS
738 return Qnil;
739 }
740
741 if (*p)
d1be9f0f 742 {
93da5fff
KH
743 int len;
744 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
745 len);
746 XSETINT (match, character);
d1be9f0f
RS
747 if (XFASTINT (match) == ' ')
748 match = Qnil;
93da5fff 749 p += len;
d1be9f0f
RS
750 }
751 else
8ea151b2 752 match = Qnil;
8489eb67 753
8ea151b2 754 val = (int) code;
8489eb67
RS
755 while (*p)
756 switch (*p++)
757 {
758 case '1':
247e20a8 759 val |= 1 << 16;
8489eb67
RS
760 break;
761
762 case '2':
247e20a8 763 val |= 1 << 17;
8489eb67
RS
764 break;
765
766 case '3':
247e20a8 767 val |= 1 << 18;
8489eb67
RS
768 break;
769
770 case '4':
247e20a8 771 val |= 1 << 19;
8489eb67
RS
772 break;
773
774 case 'p':
247e20a8 775 val |= 1 << 20;
8489eb67 776 break;
e5d4f4dc
RS
777
778 case 'b':
247e20a8 779 val |= 1 << 21;
e5d4f4dc 780 break;
8489eb67
RS
781 }
782
93da5fff
KH
783 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
784 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
785 else
786 /* Since we can't use a shared object, let's make a new one. */
787 newentry = Fcons (make_number (val), match);
788
e704cb4b 789 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
8489eb67
RS
790
791 return Qnil;
792}
793\f
794/* Dump syntax table to buffer in human-readable format */
795
340f92b5 796static void
8489eb67
RS
797describe_syntax (value)
798 Lisp_Object value;
799{
800 register enum syntaxcode code;
e5d4f4dc 801 char desc, match, start1, start2, end1, end2, prefix, comstyle;
8489eb67 802 char str[2];
8ea151b2 803 Lisp_Object first, match_lisp;
8489eb67
RS
804
805 Findent_to (make_number (16), make_number (1));
806
8ea151b2
RS
807 if (NILP (value))
808 {
93da5fff 809 insert_string ("default\n");
8ea151b2
RS
810 return;
811 }
812
908b7fea
KH
813 if (CHAR_TABLE_P (value))
814 {
815 insert_string ("deeper char-table ...\n");
816 return;
817 }
818
8ea151b2
RS
819 if (!CONSP (value))
820 {
93da5fff 821 insert_string ("invalid\n");
8ea151b2
RS
822 return;
823 }
824
825 first = XCONS (value)->car;
826 match_lisp = XCONS (value)->cdr;
827
828 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 829 {
93da5fff 830 insert_string ("invalid\n");
8489eb67
RS
831 return;
832 }
833
e704cb4b 834 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
835 start1 = (XINT (first) >> 16) & 1;
836 start2 = (XINT (first) >> 17) & 1;
837 end1 = (XINT (first) >> 18) & 1;
838 end2 = (XINT (first) >> 19) & 1;
839 prefix = (XINT (first) >> 20) & 1;
840 comstyle = (XINT (first) >> 21) & 1;
8489eb67
RS
841
842 if ((int) code < 0 || (int) code >= (int) Smax)
843 {
844 insert_string ("invalid");
845 return;
846 }
847 desc = syntax_code_spec[(int) code];
848
849 str[0] = desc, str[1] = 0;
850 insert (str, 1);
851
93da5fff
KH
852 if (NILP (match_lisp))
853 insert (" ", 1);
854 else
855 insert_char (XINT (match_lisp));
8489eb67 856
8489eb67
RS
857 if (start1)
858 insert ("1", 1);
859 if (start2)
860 insert ("2", 1);
861
862 if (end1)
863 insert ("3", 1);
864 if (end2)
865 insert ("4", 1);
866
867 if (prefix)
868 insert ("p", 1);
e5d4f4dc
RS
869 if (comstyle)
870 insert ("b", 1);
8489eb67
RS
871
872 insert_string ("\twhich means: ");
873
0220c518 874 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
875 {
876 case Swhitespace:
877 insert_string ("whitespace"); break;
878 case Spunct:
879 insert_string ("punctuation"); break;
880 case Sword:
881 insert_string ("word"); break;
882 case Ssymbol:
883 insert_string ("symbol"); break;
884 case Sopen:
885 insert_string ("open"); break;
886 case Sclose:
887 insert_string ("close"); break;
888 case Squote:
889 insert_string ("quote"); break;
890 case Sstring:
891 insert_string ("string"); break;
892 case Smath:
893 insert_string ("math"); break;
894 case Sescape:
895 insert_string ("escape"); break;
896 case Scharquote:
897 insert_string ("charquote"); break;
898 case Scomment:
899 insert_string ("comment"); break;
900 case Sendcomment:
901 insert_string ("endcomment"); break;
902 default:
903 insert_string ("invalid");
904 return;
905 }
906
8ea151b2 907 if (!NILP (match_lisp))
8489eb67
RS
908 {
909 insert_string (", matches ");
8ea151b2 910 insert_char (XINT (match_lisp));
8489eb67
RS
911 }
912
913 if (start1)
914 insert_string (",\n\t is the first character of a comment-start sequence");
915 if (start2)
916 insert_string (",\n\t is the second character of a comment-start sequence");
917
918 if (end1)
919 insert_string (",\n\t is the first character of a comment-end sequence");
920 if (end2)
921 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
922 if (comstyle)
923 insert_string (" (comment style b)");
924
8489eb67
RS
925 if (prefix)
926 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
927
928 insert_string ("\n");
929}
930
340f92b5 931static Lisp_Object
8489eb67
RS
932describe_syntax_1 (vector)
933 Lisp_Object vector;
934{
935 struct buffer *old = current_buffer;
936 set_buffer_internal (XBUFFER (Vstandard_output));
e41819d4 937 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
93da5fff
KH
938 while (! NILP (XCHAR_TABLE (vector)->parent))
939 {
940 vector = XCHAR_TABLE (vector)->parent;
941 insert_string ("\nThe parent syntax table is:");
e41819d4
RS
942 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
943 (int *) 0, 0);
93da5fff
KH
944 }
945
36cd82fe 946 call0 (intern ("help-mode"));
8489eb67
RS
947 set_buffer_internal (old);
948 return Qnil;
949}
950
951DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
952 "Describe the syntax specifications in the syntax table.\n\
953The descriptions are inserted in a buffer, which is then displayed.")
954 ()
955{
956 internal_with_output_to_temp_buffer
957 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
958
959 return Qnil;
960}
961\f
195d1361
RS
962int parse_sexp_ignore_comments;
963
8489eb67
RS
964/* Return the position across COUNT words from FROM.
965 If that many words cannot be found before the end of the buffer, return 0.
966 COUNT negative means scan backward and stop at word beginning. */
967
968scan_words (from, count)
969 register int from, count;
970{
971 register int beg = BEGV;
972 register int end = ZV;
93da5fff
KH
973 register enum syntaxcode code;
974 int ch0, ch1;
975 int temp_pos;
8489eb67
RS
976
977 immediate_quit = 1;
978 QUIT;
979
195d1361
RS
980 SETUP_SYNTAX_TABLE (from, count);
981
8489eb67
RS
982 while (count > 0)
983 {
984 while (1)
985 {
986 if (from == end)
987 {
988 immediate_quit = 0;
989 return 0;
990 }
195d1361 991 UPDATE_SYNTAX_TABLE_FORWARD (from);
93da5fff
KH
992 ch0 = FETCH_CHAR (from);
993 code = SYNTAX (ch0);
994 INC_POS (from);
8489eb67
RS
995 if (words_include_escapes
996 && (code == Sescape || code == Scharquote))
997 break;
998 if (code == Sword)
999 break;
8489eb67 1000 }
93da5fff
KH
1001 /* Now CH0 is a character which begins a word and FROM is the
1002 position of the next character. */
8489eb67
RS
1003 while (1)
1004 {
1005 if (from == end) break;
195d1361 1006 UPDATE_SYNTAX_TABLE_FORWARD (from);
93da5fff
KH
1007 ch1 = FETCH_CHAR (from);
1008 code = SYNTAX (ch1);
8489eb67
RS
1009 if (!(words_include_escapes
1010 && (code == Sescape || code == Scharquote)))
93da5fff 1011 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1012 break;
93da5fff
KH
1013 INC_POS (from);
1014 ch0 = ch1;
8489eb67
RS
1015 }
1016 count--;
1017 }
1018 while (count < 0)
1019 {
1020 while (1)
1021 {
1022 if (from == beg)
1023 {
1024 immediate_quit = 0;
1025 return 0;
1026 }
93da5fff 1027 DEC_POS (from);
195d1361 1028 UPDATE_SYNTAX_TABLE_BACKWARD (from);
93da5fff
KH
1029 ch1 = FETCH_CHAR (from);
1030 code = SYNTAX (ch1);
8489eb67
RS
1031 if (words_include_escapes
1032 && (code == Sescape || code == Scharquote))
1033 break;
1034 if (code == Sword)
1035 break;
8489eb67 1036 }
93da5fff
KH
1037 /* Now CH1 is a character which ends a word and FROM is the
1038 position of it. */
8489eb67
RS
1039 while (1)
1040 {
1041 if (from == beg) break;
93da5fff
KH
1042 temp_pos = from;
1043 DEC_POS (temp_pos);
195d1361 1044 UPDATE_SYNTAX_TABLE_BACKWARD (from);
93da5fff
KH
1045 ch0 = FETCH_CHAR (temp_pos);
1046 code = SYNTAX (ch0);
8489eb67
RS
1047 if (!(words_include_escapes
1048 && (code == Sescape || code == Scharquote)))
93da5fff 1049 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1050 break;
93da5fff
KH
1051 from = temp_pos;
1052 ch1 = ch0;
8489eb67
RS
1053 }
1054 count++;
1055 }
1056
1057 immediate_quit = 0;
1058
1059 return from;
1060}
1061
1062DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1063 "Move point forward ARG words (backward if ARG is negative).\n\
1064Normally returns t.\n\
1065If an edge of the buffer is reached, point is left there\n\
1066and nil is returned.")
1067 (count)
1068 Lisp_Object count;
1069{
1070 int val;
1071 CHECK_NUMBER (count, 0);
1072
6ec8bbd2 1073 if (!(val = scan_words (PT, XINT (count))))
8489eb67
RS
1074 {
1075 SET_PT (XINT (count) > 0 ? ZV : BEGV);
1076 return Qnil;
1077 }
1078 SET_PT (val);
1079 return Qt;
1080}
1081\f
195d1361
RS
1082Lisp_Object skip_chars ();
1083
1084DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1085 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1086STRING is like the inside of a `[...]' in a regular expression\n\
1087except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
1088Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1089With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1090Returns the distance traveled, either zero or positive.")
1091 (string, lim)
1092 Lisp_Object string, lim;
1093{
1094 return skip_chars (1, 0, string, lim);
1095}
1096
1097DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1098 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1099See `skip-chars-forward' for details.\n\
1100Returns the distance traveled, either zero or negative.")
1101 (string, lim)
1102 Lisp_Object string, lim;
1103{
1104 return skip_chars (0, 0, string, lim);
1105}
1106
1107DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1108 "Move point forward across chars in specified syntax classes.\n\
1109SYNTAX is a string of syntax code characters.\n\
1110Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1111If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1112This function returns the distance traveled, either zero or positive.")
1113 (syntax, lim)
1114 Lisp_Object syntax, lim;
1115{
1116 return skip_chars (1, 1, syntax, lim);
1117}
1118
1119DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1120 "Move point backward across chars in specified syntax classes.\n\
1121SYNTAX is a string of syntax code characters.\n\
1122Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1123If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1124This function returns the distance traveled, either zero or negative.")
1125 (syntax, lim)
1126 Lisp_Object syntax, lim;
1127{
1128 return skip_chars (0, 1, syntax, lim);
1129}
1130
1131Lisp_Object
1132skip_chars (forwardp, syntaxp, string, lim)
1133 int forwardp, syntaxp;
1134 Lisp_Object string, lim;
1135{
1136 register unsigned char *p, *pend;
1137 register unsigned int c;
7e68b0ea 1138 register int ch;
195d1361 1139 unsigned char fastmap[0400];
7e68b0ea
RS
1140 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1141 of which codes don't fit in FASTMAP. In that case, we set the
1142 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1143 and set the actual ranges of characters in CHAR_RANGES. In the
1144 form "X-Y" of STRING, both X and Y must belong to the same
1145 character set because a range striding across character sets is
1146 meaningless. */
5df4982e 1147 int *char_ranges;
7e68b0ea 1148 int n_char_ranges = 0;
195d1361
RS
1149 int negate = 0;
1150 register int i;
7e68b0ea 1151 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
195d1361
RS
1152
1153 CHECK_STRING (string, 0);
5df4982e 1154 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
195d1361
RS
1155
1156 if (NILP (lim))
1157 XSETINT (lim, forwardp ? ZV : BEGV);
1158 else
1159 CHECK_NUMBER_COERCE_MARKER (lim, 1);
1160
1161 /* In any case, don't allow scan outside bounds of buffer. */
1162 /* jla turned this off, for no known reason.
1163 bfox turned the ZV part on, and rms turned the
1164 BEGV part back on. */
1165 if (XINT (lim) > ZV)
1166 XSETFASTINT (lim, ZV);
1167 if (XINT (lim) < BEGV)
1168 XSETFASTINT (lim, BEGV);
1169
1170 p = XSTRING (string)->data;
1171 pend = p + XSTRING (string)->size;
1172 bzero (fastmap, sizeof fastmap);
1173
1174 if (p != pend && *p == '^')
1175 {
1176 negate = 1; p++;
1177 }
1178
1179 /* Find the characters specified and set their elements of fastmap.
1180 If syntaxp, each character counts as itself.
1181 Otherwise, handle backslashes and ranges specially. */
1182
1183 while (p != pend)
1184 {
7e68b0ea
RS
1185 c = *p;
1186 if (multibyte)
1187 {
1188 ch = STRING_CHAR (p, pend - p);
1189 p += BYTES_BY_CHAR_HEAD (*p);
1190 }
1191 else
1192 {
1193 ch = c;
1194 p++;
1195 }
195d1361
RS
1196 if (syntaxp)
1197 fastmap[syntax_spec_code[c]] = 1;
1198 else
1199 {
1200 if (c == '\\')
1201 {
1202 if (p == pend) break;
1203 c = *p++;
1204 }
1205 if (p != pend && *p == '-')
1206 {
7e68b0ea
RS
1207 unsigned int ch2;
1208
195d1361
RS
1209 p++;
1210 if (p == pend) break;
7e68b0ea
RS
1211 if (SINGLE_BYTE_CHAR_P (ch))
1212 while (c <= *p)
1213 {
1214 fastmap[c] = 1;
1215 c++;
1216 }
1217 else
195d1361 1218 {
7e68b0ea
RS
1219 fastmap[c] = 1; /* C is the base leading-code. */
1220 ch2 = STRING_CHAR (p, pend - p);
1221 if (ch <= ch2)
1222 char_ranges[n_char_ranges++] = ch,
1223 char_ranges[n_char_ranges++] = ch2;
195d1361 1224 }
7e68b0ea 1225 p += multibyte ? BYTES_BY_CHAR_HEAD (*p) : 1;
195d1361
RS
1226 }
1227 else
7e68b0ea
RS
1228 {
1229 fastmap[c] = 1;
1230 if (!SINGLE_BYTE_CHAR_P (ch))
1231 {
1232 char_ranges[n_char_ranges++] = ch;
1233 char_ranges[n_char_ranges++] = ch;
1234 }
1235 }
195d1361
RS
1236 }
1237 }
1238
7e68b0ea
RS
1239 /* If ^ was the first character, complement the fastmap. In
1240 addition, as all multibyte characters have possibility of
1241 matching, set all entries for base leading codes, which is
1242 harmless even if SYNTAXP is 1. */
195d1361
RS
1243
1244 if (negate)
1245 for (i = 0; i < sizeof fastmap; i++)
7e68b0ea
RS
1246 {
1247 if (!multibyte || !BASE_LEADING_CODE_P (i))
1248 fastmap[i] ^= 1;
1249 else
1250 fastmap[i] = 1;
1251 }
195d1361
RS
1252
1253 {
1254 int start_point = PT;
1255 int pos = PT;
1256
1257 immediate_quit = 1;
1258 if (syntaxp)
1259 {
1260 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1261 if (forwardp)
1262 {
7e68b0ea 1263 if (multibyte)
195d1361 1264 {
7e68b0ea
RS
1265 while (pos < XINT (lim)
1266 && fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1267 {
1268 INC_POS (pos);
1269 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1270 }
1271 }
1272 else
1273 {
1274 while (pos < XINT (lim)
1275 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1276 {
1277 pos++;
1278 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1279 }
195d1361
RS
1280 }
1281 }
1282 else
1283 {
7e68b0ea 1284 if (multibyte)
195d1361 1285 {
7e68b0ea
RS
1286 while (pos > XINT (lim))
1287 {
1288 int savepos = pos;
1289 DEC_POS (pos);
4c8535eb 1290 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
7e68b0ea
RS
1291 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos))])
1292 {
1293 pos = savepos;
1294 break;
1295 }
7e68b0ea
RS
1296 }
1297 }
1298 else
1299 {
4c8535eb 1300 while (pos > XINT (lim))
7e68b0ea
RS
1301 {
1302 pos--;
4c8535eb
RS
1303 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1304 if (!fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1305 {
1306 pos++;
1307 break;
1308 }
7e68b0ea 1309 }
195d1361
RS
1310 }
1311 }
1312 }
1313 else
1314 {
1315 if (forwardp)
1316 {
7e68b0ea
RS
1317 if (multibyte)
1318 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos))])
1319 {
1320 if (!BASE_LEADING_CODE_P (c))
1321 pos++;
1322 else if (n_char_ranges)
1323 {
1324 /* We much check CHAR_RANGES for a multibyte
1325 character. */
1326 ch = FETCH_MULTIBYTE_CHAR (pos);
1327 for (i = 0; i < n_char_ranges; i += 2)
1328 if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
1329 break;
1330 if (!(negate ^ (i < n_char_ranges)))
1331 break;
1332
1333 INC_POS (pos);
1334 }
1335 else
1336 {
1337 if (!negate) break;
1338 INC_POS (pos);
1339 }
1340 }
1341 else
1342 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1343 pos++;
195d1361
RS
1344 }
1345 else
1346 {
7e68b0ea
RS
1347 if (multibyte)
1348 while (pos > XINT (lim))
1349 {
1350 int savepos = pos;
1351 DEC_POS (pos);
1352 if (fastmap[(c = FETCH_BYTE (pos))])
1353 {
1354 if (!BASE_LEADING_CODE_P (c))
1355 ;
1356 else if (n_char_ranges)
1357 {
1358 /* We much check CHAR_RANGES for a multibyte
1359 character. */
1360 ch = FETCH_MULTIBYTE_CHAR (pos);
1361 for (i = 0; i < n_char_ranges; i += 2)
1362 if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
1363 break;
1364 if (!(negate ^ (i < n_char_ranges)))
1365 {
1366 pos = savepos;
1367 break;
1368 }
1369 }
1370 else
1371 if (!negate)
1372 {
1373 pos = savepos;
1374 break;
1375 }
1376 }
1377 else
1378 {
1379 pos = savepos;
1380 break;
1381 }
1382 }
1383 else
1384 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1385 pos--;
195d1361
RS
1386 }
1387 }
7e68b0ea
RS
1388
1389 if (multibyte
1390 /* INC_POS or DEC_POS might have moved POS over LIM. */
1391 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1392 pos = XINT (lim);
1393
195d1361
RS
1394 SET_PT (pos);
1395 immediate_quit = 0;
1396
1397 return make_number (PT - start_point);
1398 }
1399}
1400\f
b3cfe0c8
RS
1401DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1402 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 1403Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 1404Set point to where scanning stops.\n\
b3cfe0c8
RS
1405If N comments are found as expected, with nothing except whitespace\n\
1406between them, return t; otherwise return nil.")
1407 (count)
840f481c 1408 Lisp_Object count;
b3cfe0c8
RS
1409{
1410 register int from;
1411 register int stop;
8ea151b2 1412 register int c, c1;
b3cfe0c8
RS
1413 register enum syntaxcode code;
1414 int comstyle = 0; /* style of comment encountered */
be720845 1415 int found;
840f481c 1416 int count1;
93da5fff 1417 int temp_pos;
840f481c
RS
1418
1419 CHECK_NUMBER (count, 0);
1420 count1 = XINT (count);
195d1361 1421 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
1422
1423 immediate_quit = 1;
1424 QUIT;
1425
1426 from = PT;
1427
195d1361 1428 SETUP_SYNTAX_TABLE (from, count1);
840f481c 1429 while (count1 > 0)
b3cfe0c8 1430 {
04882296 1431 do
b3cfe0c8 1432 {
04882296
KH
1433 if (from == stop)
1434 {
1435 SET_PT (from);
b7e6e612 1436 immediate_quit = 0;
04882296
KH
1437 return Qnil;
1438 }
195d1361 1439 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8
RS
1440 c = FETCH_CHAR (from);
1441 code = SYNTAX (c);
93da5fff 1442 INC_POS (from);
b3cfe0c8
RS
1443 comstyle = 0;
1444 if (from < stop && SYNTAX_COMSTART_FIRST (c)
8ea151b2
RS
1445 && (c1 = FETCH_CHAR (from),
1446 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 1447 {
7fc8191e
RS
1448 /* We have encountered a comment start sequence and we
1449 are ignoring all text inside comments. We must record
b3cfe0c8
RS
1450 the comment style this sequence begins so that later,
1451 only a comment end of the same style actually ends
7fc8191e 1452 the comment section. */
b3cfe0c8 1453 code = Scomment;
8ea151b2 1454 comstyle = SYNTAX_COMMENT_STYLE (c1);
93da5fff 1455 INC_POS (from);
b3cfe0c8 1456 }
04882296
KH
1457 }
1458 while (code == Swhitespace || code == Sendcomment);
195d1361 1459 if (code != Scomment && code != Scomment_fence)
04882296
KH
1460 {
1461 immediate_quit = 0;
93da5fff
KH
1462 DEC_POS (from);
1463 SET_PT (from);
04882296
KH
1464 return Qnil;
1465 }
1466 /* We're at the start of a comment. */
1467 while (1)
1468 {
1469 if (from == stop)
b3cfe0c8
RS
1470 {
1471 immediate_quit = 0;
04882296 1472 SET_PT (from);
b3cfe0c8
RS
1473 return Qnil;
1474 }
195d1361 1475 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 1476 c = FETCH_CHAR (from);
93da5fff 1477 INC_POS (from);
04882296
KH
1478 if (SYNTAX (c) == Sendcomment
1479 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1480 /* we have encountered a comment end of the same style
1481 as the comment sequence which began this comment
1482 section */
1483 break;
195d1361
RS
1484 if (SYNTAX (c) == Scomment_fence
1485 && comstyle == ST_COMMENT_STYLE)
1486 /* we have encountered a comment end of the same style
1487 as the comment sequence which began this comment
1488 section. */
1489 break;
04882296 1490 if (from < stop && SYNTAX_COMEND_FIRST (c)
8ea151b2
RS
1491 && (c1 = FETCH_CHAR (from),
1492 SYNTAX_COMEND_SECOND (c1))
04882296
KH
1493 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1494 /* we have encountered a comment end of the same style
1495 as the comment sequence which began this comment
1496 section */
93da5fff 1497 { INC_POS (from); break; }
b3cfe0c8 1498 }
04882296 1499 /* We have skipped one comment. */
840f481c 1500 count1--;
b3cfe0c8
RS
1501 }
1502
840f481c 1503 while (count1 < 0)
b3cfe0c8 1504 {
b9145dbb 1505 while (1)
b3cfe0c8
RS
1506 {
1507 int quoted;
b9145dbb
RS
1508 if (from <= stop)
1509 {
1510 SET_PT (stop);
1511 immediate_quit = 0;
1512 return Qnil;
1513 }
b3cfe0c8 1514
93da5fff 1515 DEC_POS (from);
b3cfe0c8
RS
1516 quoted = char_quoted (from);
1517 if (quoted)
195d1361
RS
1518 {
1519 DEC_POS (from);
1520 goto leave; /* ????? XXXXX */
1521 }
1522 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b3cfe0c8
RS
1523 c = FETCH_CHAR (from);
1524 code = SYNTAX (c);
1525 comstyle = 0;
7fc8191e
RS
1526 if (code == Sendcomment)
1527 comstyle = SYNTAX_COMMENT_STYLE (c);
93da5fff
KH
1528 temp_pos = from;
1529 DEC_POS (temp_pos);
b3cfe0c8 1530 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff 1531 && (c1 = FETCH_CHAR (temp_pos),
8ea151b2 1532 SYNTAX_COMEND_FIRST (c1))
93da5fff 1533 && !char_quoted (temp_pos))
b3cfe0c8 1534 {
7fc8191e 1535 /* We must record the comment style encountered so that
b3cfe0c8 1536 later, we can match only the proper comment begin
7fc8191e 1537 sequence of the same style. */
b3cfe0c8 1538 code = Sendcomment;
8ea151b2 1539 comstyle = SYNTAX_COMMENT_STYLE (c1);
93da5fff 1540 from = temp_pos;
b3cfe0c8 1541 }
ae03af34
RS
1542 if (from > stop && SYNTAX_COMSTART_SECOND (c)
1543 && (c1 = FETCH_CHAR (temp_pos),
1544 SYNTAX_COMSTART_FIRST (c1))
1545 && !char_quoted (temp_pos))
1546 {
1547 /* We must record the comment style encountered so that
1548 later, we can match only the proper comment begin
1549 sequence of the same style. */
1550 code = Scomment;
1551 from = temp_pos;
1552 }
b3cfe0c8 1553
195d1361
RS
1554 if (code == Scomment_fence)
1555 {
1556 /* Skip until first preceding unquoted comment_fence. */
1557 int found = 0, ini = from;
1558
1559 while (--from != stop)
1560 {
1561 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1562 c = FETCH_CHAR (from);
1563 if (SYNTAX (c) == Scomment_fence && !char_quoted (from))
1564 {
1565 found = 1;
1566 break;
1567 }
1568 }
1569 if (found == 0)
1570 {
1571 from = ini; /* Set point to ini + 1. */
1572 goto leave;
1573 }
1574 }
1575 else if (code == Sendcomment)
b3cfe0c8 1576 {
8f9dc2ed 1577#if 0
b3cfe0c8
RS
1578 if (code != SYNTAX (c))
1579 /* For a two-char comment ender, we can assume
1580 it does end a comment. So scan back in a simple way. */
1581 {
93da5fff 1582 if (from != stop) DEC_POS (from);
b3cfe0c8
RS
1583 while (1)
1584 {
8ea151b2
RS
1585 if ((c = FETCH_CHAR (from),
1586 SYNTAX (c) == Scomment)
b3cfe0c8
RS
1587 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1588 break;
1589 if (from == stop)
1590 {
1591 immediate_quit = 0;
c81a3712 1592 SET_PT (from);
b3cfe0c8
RS
1593 return Qnil;
1594 }
93da5fff 1595 DEC_POS (from);
b3cfe0c8 1596 if (SYNTAX_COMSTART_SECOND (c)
8ea151b2
RS
1597 && (c1 = FETCH_CHAR (from),
1598 SYNTAX_COMSTART_FIRST (c1))
b3cfe0c8
RS
1599 && SYNTAX_COMMENT_STYLE (c) == comstyle
1600 && !char_quoted (from))
1601 break;
1602 }
1603 break;
1604 }
8f9dc2ed 1605#endif /* 0 */
ea315ed6 1606 found = back_comment (from, stop, comstyle);
195d1361
RS
1607 if (found != -1) from = found;
1608#if 0
b3cfe0c8
RS
1609 /* Look back, counting the parity of string-quotes,
1610 and recording the comment-starters seen.
1611 When we reach a safe place, assume that's not in a string;
1612 then step the main scan to the earliest comment-starter seen
1613 an even number of string quotes away from the safe place.
1614
1615 OFROM[I] is position of the earliest comment-starter seen
1616 which is I+2X quotes from the comment-end.
1617 PARITY is current parity of quotes from the comment end. */
1618 {
1619 int parity = 0;
1620 char my_stringend = 0;
1621 int string_lossage = 0;
1622 int comment_end = from;
1623 int comstart_pos = 0;
1624 int comstart_parity = 0;
93da5fff 1625 int scanstart = from;
b3cfe0c8 1626
93da5fff 1627 DEC_POS (scanstart);
b3cfe0c8
RS
1628 /* At beginning of range to scan, we're outside of strings;
1629 that determines quote parity to the comment-end. */
1630 while (from != stop)
1631 {
1632 /* Move back and examine a character. */
93da5fff 1633 DEC_POS (from);
b3cfe0c8 1634
195d1361 1635 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b3cfe0c8
RS
1636 c = FETCH_CHAR (from);
1637 code = SYNTAX (c);
1638
1639 /* If this char is the second of a 2-char comment sequence,
1640 back up and give the pair the appropriate syntax. */
93da5fff
KH
1641 temp_pos = from;
1642 DEC_POS (temp_pos);
b3cfe0c8 1643 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff
KH
1644 && (c1 = FETCH_CHAR (temp_pos),
1645 SYNTAX_COMEND_FIRST (c1)))
b3cfe0c8
RS
1646 {
1647 code = Sendcomment;
93da5fff
KH
1648 from = temp_pos;
1649 c = c1;
b3cfe0c8
RS
1650 }
1651
93da5fff
KH
1652 temp_pos = from;
1653 INC_POS (temp_pos);
4841063f
RS
1654 /* If this char starts a 2-char comment start sequence,
1655 treat it like a 1-char comment starter. */
1656 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
93da5fff
KH
1657 && (c1 = FETCH_CHAR (temp_pos),
1658 SYNTAX_COMSTART_SECOND (c1))
1659 && comstyle == SYNTAX_COMMENT_STYLE (c1))
4841063f 1660 code = Scomment;
b3cfe0c8
RS
1661
1662 /* Ignore escaped characters. */
1663 if (char_quoted (from))
1664 continue;
1665
1666 /* Track parity of quotes. */
1667 if (code == Sstring)
1668 {
1669 parity ^= 1;
1670 if (my_stringend == 0)
1671 my_stringend = c;
1672 /* If we have two kinds of string delimiters.
1673 There's no way to grok this scanning backwards. */
1674 else if (my_stringend != c)
1675 string_lossage = 1;
1676 }
1677
1678 /* Record comment-starters according to that
1679 quote-parity to the comment-end. */
1680 if (code == Scomment)
1681 {
1682 comstart_parity = parity;
1683 comstart_pos = from;
1684 }
1685
1686 /* If we find another earlier comment-ender,
eb8c3be9 1687 any comment-starts earlier than that don't count
b3cfe0c8
RS
1688 (because they go with the earlier comment-ender). */
1689 if (code == Sendcomment
1690 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1691 break;
1692
1693 /* Assume a defun-start point is outside of strings. */
1694 if (code == Sopen
93da5fff 1695 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
b3cfe0c8
RS
1696 break;
1697 }
1698
1699 if (comstart_pos == 0)
1700 from = comment_end;
1701 /* If the earliest comment starter
1702 is followed by uniform paired string quotes or none,
1703 we know it can't be inside a string
1704 since if it were then the comment ender would be inside one.
1705 So it does start a comment. Skip back to it. */
1706 else if (comstart_parity == 0 && !string_lossage)
1707 from = comstart_pos;
1708 else
1709 {
1710 /* We had two kinds of string delimiters mixed up
1711 together. Decode this going forwards.
1712 Scan fwd from the previous comment ender
1713 to the one in question; this records where we
1714 last passed a comment starter. */
1715 struct lisp_parse_state state;
1716 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 1717 comment_end - 1, -10000, 0, Qnil, 0);
b3cfe0c8 1718 if (state.incomment)
195d1361 1719 from = state.comstr_start;
b3cfe0c8
RS
1720 else
1721 /* We can't grok this as a comment; scan it normally. */
1722 from = comment_end;
1723 }
1724 }
195d1361 1725#endif /* 0 */
7fc8191e
RS
1726 /* We have skipped one comment. */
1727 break;
b3cfe0c8 1728 }
195d1361 1729 else if (code != Swhitespace && code != Scomment)
b3cfe0c8 1730 {
195d1361 1731 leave:
b3cfe0c8 1732 immediate_quit = 0;
93da5fff
KH
1733 INC_POS (from);
1734 SET_PT (from);
b3cfe0c8
RS
1735 return Qnil;
1736 }
1737 }
1738
840f481c 1739 count1++;
b3cfe0c8
RS
1740 }
1741
1742 SET_PT (from);
1743 immediate_quit = 0;
1744 return Qt;
1745}
1746\f
8489eb67
RS
1747Lisp_Object
1748scan_lists (from, count, depth, sexpflag)
1749 register int from;
1750 int count, depth, sexpflag;
1751{
1752 Lisp_Object val;
195d1361 1753 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
1754 register int c, c1;
1755 int stringterm;
8489eb67
RS
1756 int quoted;
1757 int mathexit = 0;
93da5fff 1758 register enum syntaxcode code, temp_code;
195d1361 1759 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 1760 int comstyle = 0; /* style of comment encountered */
93da5fff 1761 int temp_pos;
7bf5e9e4 1762 int last_good = from;
195d1361 1763 int found;
8489eb67
RS
1764
1765 if (depth > 0) min_depth = 0;
1766
1767 immediate_quit = 1;
1768 QUIT;
1769
195d1361 1770 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
1771 while (count > 0)
1772 {
8489eb67
RS
1773 while (from < stop)
1774 {
195d1361 1775 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67 1776 c = FETCH_CHAR (from);
e5d4f4dc 1777 code = SYNTAX (c);
7bf5e9e4
RS
1778 if (depth == min_depth)
1779 last_good = from;
93da5fff 1780 INC_POS (from);
195d1361 1781 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
1782 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1783 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
1784 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1785 {
1786 /* we have encountered a comment start sequence and we
195d1361 1787 are ignoring all text inside comments. We must record
e5d4f4dc
RS
1788 the comment style this sequence begins so that later,
1789 only a comment end of the same style actually ends
1790 the comment section */
1791 code = Scomment;
1792 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
93da5fff 1793 INC_POS (from);
e5d4f4dc
RS
1794 }
1795
195d1361 1796 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
1797 if (SYNTAX_PREFIX (c))
1798 continue;
1799
0220c518 1800 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1801 {
1802 case Sescape:
1803 case Scharquote:
1804 if (from == stop) goto lose;
93da5fff 1805 INC_POS (from);
8489eb67
RS
1806 /* treat following character as a word constituent */
1807 case Sword:
1808 case Ssymbol:
1809 if (depth || !sexpflag) break;
195d1361 1810 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
1811 while (from < stop)
1812 {
195d1361 1813 UPDATE_SYNTAX_TABLE_FORWARD (from);
0220c518 1814 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1815 {
1816 case Scharquote:
1817 case Sescape:
93da5fff 1818 INC_POS (from);
8489eb67
RS
1819 if (from == stop) goto lose;
1820 break;
1821 case Sword:
1822 case Ssymbol:
1823 case Squote:
1824 break;
1825 default:
1826 goto done;
1827 }
93da5fff 1828 INC_POS (from);
8489eb67
RS
1829 }
1830 goto done;
1831
1832 case Scomment:
195d1361 1833 case Scomment_fence:
8489eb67
RS
1834 if (!parse_sexp_ignore_comments) break;
1835 while (1)
1836 {
c01f7ec0
RS
1837 if (from == stop)
1838 {
1839 if (depth == 0)
1840 goto done;
1841 goto lose;
1842 }
195d1361 1843 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 1844 c = FETCH_CHAR (from);
195d1361
RS
1845 if (code == Scomment
1846 ? (SYNTAX (c) == Sendcomment
1847 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1848 : (SYNTAX (c) == Scomment_fence))
e5d4f4dc
RS
1849 /* we have encountered a comment end of the same style
1850 as the comment sequence which began this comment
1851 section */
8489eb67 1852 break;
93da5fff 1853 INC_POS (from);
8489eb67 1854 if (from < stop && SYNTAX_COMEND_FIRST (c)
e5d4f4dc 1855 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
195d1361
RS
1856 && SYNTAX_COMMENT_STYLE (c) == comstyle
1857 && code == Scomment)
e5d4f4dc
RS
1858 /* we have encountered a comment end of the same style
1859 as the comment sequence which began this comment
1860 section */
93da5fff 1861 { INC_POS (from); break; }
8489eb67
RS
1862 }
1863 break;
1864
1865 case Smath:
1866 if (!sexpflag)
1867 break;
1868 if (from != stop && c == FETCH_CHAR (from))
93da5fff 1869 INC_POS (from);
8489eb67
RS
1870 if (mathexit)
1871 {
1872 mathexit = 0;
1873 goto close1;
1874 }
1875 mathexit = 1;
1876
1877 case Sopen:
1878 if (!++depth) goto done;
1879 break;
1880
1881 case Sclose:
1882 close1:
1883 if (!--depth) goto done;
1884 if (depth < min_depth)
7bf5e9e4
RS
1885 Fsignal (Qscan_error,
1886 Fcons (build_string ("Containing expression ends prematurely"),
1887 Fcons (make_number (last_good),
1888 Fcons (make_number (from), Qnil))));
8489eb67
RS
1889 break;
1890
1891 case Sstring:
195d1361 1892 case Sstring_fence:
93da5fff
KH
1893 temp_pos = from;
1894 DEC_POS (temp_pos);
1895 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
1896 while (1)
1897 {
1898 if (from >= stop) goto lose;
195d1361
RS
1899 UPDATE_SYNTAX_TABLE_FORWARD (from);
1900 if (code == Sstring
1901 ? (FETCH_CHAR (from) == stringterm)
1902 : SYNTAX (FETCH_CHAR (from)) == Sstring_fence)
1903 break;
0220c518 1904 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
1905 {
1906 case Scharquote:
1907 case Sescape:
93da5fff 1908 INC_POS (from);
8489eb67 1909 }
93da5fff 1910 INC_POS (from);
8489eb67 1911 }
93da5fff 1912 INC_POS (from);
8489eb67
RS
1913 if (!depth && sexpflag) goto done;
1914 break;
1915 }
1916 }
1917
1918 /* Reached end of buffer. Error if within object, return nil if between */
1919 if (depth) goto lose;
1920
1921 immediate_quit = 0;
1922 return Qnil;
1923
1924 /* End of object reached */
1925 done:
1926 count--;
1927 }
1928
1929 while (count < 0)
1930 {
8489eb67
RS
1931 while (from > stop)
1932 {
93da5fff 1933 DEC_POS (from);
195d1361 1934 UPDATE_SYNTAX_TABLE_BACKWARD (from);
8489eb67 1935 if (quoted = char_quoted (from))
195d1361
RS
1936 {
1937 DEC_POS (from);
1938 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1939 }
8489eb67
RS
1940 c = FETCH_CHAR (from);
1941 code = SYNTAX (c);
7bf5e9e4
RS
1942 if (depth == min_depth)
1943 last_good = from;
7fc8191e
RS
1944 comstyle = 0;
1945 if (code == Sendcomment)
1946 comstyle = SYNTAX_COMMENT_STYLE (c);
93da5fff
KH
1947 temp_pos = from;
1948 DEC_POS (temp_pos);
8489eb67 1949 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff
KH
1950 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1951 && !char_quoted (temp_pos)
8489eb67 1952 && parse_sexp_ignore_comments)
e5d4f4dc
RS
1953 {
1954 /* we must record the comment style encountered so that
1955 later, we can match only the proper comment begin
1956 sequence of the same style */
1957 code = Sendcomment;
93da5fff
KH
1958 comstyle = SYNTAX_COMMENT_STYLE (c1);
1959 from = temp_pos;
e5d4f4dc
RS
1960 }
1961
8489eb67
RS
1962 if (SYNTAX_PREFIX (c))
1963 continue;
1964
0220c518 1965 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
8489eb67
RS
1966 {
1967 case Sword:
1968 case Ssymbol:
1969 if (depth || !sexpflag) break;
195d1361
RS
1970 /* This word counts as a sexp; count object finished
1971 after passing it. */
8489eb67
RS
1972 while (from > stop)
1973 {
93da5fff
KH
1974 temp_pos = from;
1975 DEC_POS (temp_pos);
195d1361 1976 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff 1977 quoted = char_quoted (temp_pos);
8489eb67 1978 if (quoted)
93da5fff
KH
1979 {
1980 from = temp_pos;
1981 DEC_POS (temp_pos);
195d1361 1982 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff
KH
1983 }
1984 c1 = FETCH_CHAR (temp_pos);
1985 temp_code = SYNTAX (c1);
1986 if (! (quoted || temp_code == Sword
1987 || temp_code == Ssymbol
1988 || temp_code == Squote))
8489eb67 1989 goto done2;
93da5fff 1990 from = temp_pos;
8489eb67
RS
1991 }
1992 goto done2;
1993
1994 case Smath:
1995 if (!sexpflag)
1996 break;
93da5fff
KH
1997 temp_pos = from;
1998 DEC_POS (temp_pos);
195d1361 1999 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff
KH
2000 if (from != stop && c == FETCH_CHAR (temp_pos))
2001 from = temp_pos;
8489eb67
RS
2002 if (mathexit)
2003 {
2004 mathexit = 0;
2005 goto open2;
2006 }
2007 mathexit = 1;
2008
2009 case Sclose:
2010 if (!++depth) goto done2;
2011 break;
2012
2013 case Sopen:
2014 open2:
2015 if (!--depth) goto done2;
2016 if (depth < min_depth)
7bf5e9e4
RS
2017 Fsignal (Qscan_error,
2018 Fcons (build_string ("Containing expression ends prematurely"),
2019 Fcons (make_number (last_good),
2020 Fcons (make_number (from), Qnil))));
8489eb67
RS
2021 break;
2022
2023 case Sendcomment:
2024 if (!parse_sexp_ignore_comments)
2025 break;
8f9dc2ed 2026#if 0
37bef230
RS
2027 if (code != SYNTAX (c))
2028 /* For a two-char comment ender, we can assume
2029 it does end a comment. So scan back in a simple way. */
2030 {
93da5fff 2031 if (from != stop) DEC_POS (from);
37bef230
RS
2032 while (1)
2033 {
2034 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
2035 && SYNTAX_COMMENT_STYLE (c) == comstyle)
2036 break;
c01f7ec0
RS
2037 if (from == stop)
2038 {
2039 if (depth == 0)
2040 goto done2;
2041 goto lose;
2042 }
93da5fff 2043 DEC_POS (from);
37bef230
RS
2044 if (SYNTAX_COMSTART_SECOND (c)
2045 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
2046 && SYNTAX_COMMENT_STYLE (c) == comstyle
2047 && !char_quoted (from))
2048 break;
2049 }
2050 break;
2051 }
8f9dc2ed 2052#endif /* 0 */
ea315ed6 2053 found = back_comment (from, stop, comstyle);
195d1361
RS
2054 if (found != -1) from = found;
2055#if 0
8489eb67
RS
2056 /* Look back, counting the parity of string-quotes,
2057 and recording the comment-starters seen.
2058 When we reach a safe place, assume that's not in a string;
2059 then step the main scan to the earliest comment-starter seen
2060 an even number of string quotes away from the safe place.
2061
2062 OFROM[I] is position of the earliest comment-starter seen
2063 which is I+2X quotes from the comment-end.
2064 PARITY is current parity of quotes from the comment end. */
2065 {
8489eb67 2066 int parity = 0;
e5d4f4dc
RS
2067 char my_stringend = 0;
2068 int string_lossage = 0;
2069 int comment_end = from;
37bef230
RS
2070 int comstart_pos = 0;
2071 int comstart_parity = 0;
93da5fff
KH
2072 int scanstart = from;
2073
2074 DEC_POS (scanstart);
8489eb67
RS
2075
2076 /* At beginning of range to scan, we're outside of strings;
2077 that determines quote parity to the comment-end. */
2078 while (from != stop)
2079 {
2080 /* Move back and examine a character. */
93da5fff 2081 DEC_POS (from);
8489eb67
RS
2082
2083 c = FETCH_CHAR (from);
2084 code = SYNTAX (c);
2085
2086 /* If this char is the second of a 2-char comment sequence,
2087 back up and give the pair the appropriate syntax. */
93da5fff
KH
2088 temp_pos = from;
2089 DEC_POS (temp_pos);
8489eb67 2090 if (from > stop && SYNTAX_COMEND_SECOND (c)
93da5fff
KH
2091 && (c1 = FETCH_CHAR (temp_pos),
2092 SYNTAX_COMEND_FIRST (c1)))
e5d4f4dc
RS
2093 {
2094 code = Sendcomment;
93da5fff
KH
2095 from = temp_pos;
2096 c = c1;
e5d4f4dc
RS
2097 }
2098
4841063f
RS
2099 /* If this char starts a 2-char comment start sequence,
2100 treat it like a 1-char comment starter. */
93da5fff
KH
2101 temp_pos = from;
2102 INC_POS (temp_pos);
4841063f 2103 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
93da5fff
KH
2104 && (c1 = FETCH_CHAR (temp_pos),
2105 SYNTAX_COMSTART_SECOND (c1))
2106 && comstyle == SYNTAX_COMMENT_STYLE (c1))
4841063f 2107 code = Scomment;
8489eb67
RS
2108
2109 /* Ignore escaped characters. */
2110 if (char_quoted (from))
2111 continue;
2112
37bef230 2113 /* Track parity of quotes. */
8489eb67 2114 if (code == Sstring)
e5d4f4dc
RS
2115 {
2116 parity ^= 1;
2117 if (my_stringend == 0)
2118 my_stringend = c;
37bef230 2119 /* If we have two kinds of string delimiters.
e5d4f4dc
RS
2120 There's no way to grok this scanning backwards. */
2121 else if (my_stringend != c)
2122 string_lossage = 1;
2123 }
8489eb67
RS
2124
2125 /* Record comment-starters according to that
2126 quote-parity to the comment-end. */
2127 if (code == Scomment)
37bef230
RS
2128 {
2129 comstart_parity = parity;
2130 comstart_pos = from;
2131 }
8489eb67 2132
37bef230 2133 /* If we find another earlier comment-ender,
eb8c3be9 2134 any comment-starts earlier than that don't count
37bef230 2135 (because they go with the earlier comment-ender). */
e5d4f4dc
RS
2136 if (code == Sendcomment
2137 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
8489eb67 2138 break;
37bef230
RS
2139
2140 /* Assume a defun-start point is outside of strings. */
2141 if (code == Sopen
93da5fff 2142 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
37bef230 2143 break;
8489eb67 2144 }
37bef230
RS
2145
2146 if (comstart_pos == 0)
2147 from = comment_end;
2148 /* If the earliest comment starter
2149 is followed by uniform paired string quotes or none,
2150 we know it can't be inside a string
2151 since if it were then the comment ender would be inside one.
2152 So it does start a comment. Skip back to it. */
2153 else if (comstart_parity == 0 && !string_lossage)
2154 from = comstart_pos;
2155 else
e5d4f4dc
RS
2156 {
2157 /* We had two kinds of string delimiters mixed up
2158 together. Decode this going forwards.
2159 Scan fwd from the previous comment ender
2160 to the one in question; this records where we
2161 last passed a comment starter. */
2162 struct lisp_parse_state state;
37bef230 2163 scan_sexps_forward (&state, find_defun_start (comment_end),
340f92b5 2164 comment_end - 1, -10000, 0, Qnil, 0);
e5d4f4dc 2165 if (state.incomment)
195d1361 2166 from = state.comstr_start;
e5d4f4dc
RS
2167 else
2168 /* We can't grok this as a comment; scan it normally. */
2169 from = comment_end;
2170 }
8489eb67 2171 }
195d1361 2172#endif /* 0 */
8489eb67
RS
2173 break;
2174
195d1361
RS
2175 case Scomment_fence:
2176 case Sstring_fence:
2177 while (1)
2178 {
2179 DEC_POS (from);
2180 if (from == stop) goto lose;
2181 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2182 if (!char_quoted (from)
2183 && SYNTAX (FETCH_CHAR (from)) == code)
2184 break;
2185 }
2186 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2187 break;
2188
8489eb67
RS
2189 case Sstring:
2190 stringterm = FETCH_CHAR (from);
2191 while (1)
2192 {
2193 if (from == stop) goto lose;
93da5fff
KH
2194 temp_pos = from;
2195 DEC_POS (temp_pos);
195d1361 2196 UPDATE_SYNTAX_TABLE_BACKWARD (temp_pos);
93da5fff
KH
2197 if (!char_quoted (temp_pos)
2198 && stringterm == FETCH_CHAR (temp_pos))
8489eb67 2199 break;
93da5fff 2200 from = temp_pos;
8489eb67 2201 }
93da5fff 2202 DEC_POS (from);
8489eb67
RS
2203 if (!depth && sexpflag) goto done2;
2204 break;
2205 }
2206 }
2207
2208 /* Reached start of buffer. Error if within object, return nil if between */
2209 if (depth) goto lose;
2210
2211 immediate_quit = 0;
2212 return Qnil;
2213
2214 done2:
2215 count++;
2216 }
2217
2218
2219 immediate_quit = 0;
1e142fb7 2220 XSETFASTINT (val, from);
8489eb67
RS
2221 return val;
2222
2223 lose:
7bf5e9e4
RS
2224 Fsignal (Qscan_error,
2225 Fcons (build_string ("Unbalanced parentheses"),
2226 Fcons (make_number (last_good),
2227 Fcons (make_number (from), Qnil))));
2228
8489eb67
RS
2229 /* NOTREACHED */
2230}
2231
8489eb67
RS
2232DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2233 "Scan from character number FROM by COUNT lists.\n\
2234Returns the character number of the position thus found.\n\
2235\n\
2236If DEPTH is nonzero, paren depth begins counting from that value,\n\
2237only places where the depth in parentheses becomes zero\n\
2238are candidates for stopping; COUNT such places are counted.\n\
2239Thus, a positive value for DEPTH means go out levels.\n\
2240\n\
2241Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2242\n\
2243If the beginning or end of (the accessible part of) the buffer is reached\n\
2244and the depth is wrong, an error is signaled.\n\
2245If the depth is right but the count is not used up, nil is returned.")
2246 (from, count, depth)
2247 Lisp_Object from, count, depth;
2248{
2249 CHECK_NUMBER (from, 0);
2250 CHECK_NUMBER (count, 1);
2251 CHECK_NUMBER (depth, 2);
2252
2253 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2254}
2255
2256DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2257 "Scan from character number FROM by COUNT balanced expressions.\n\
2258If COUNT is negative, scan backwards.\n\
2259Returns the character number of the position thus found.\n\
2260\n\
2261Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2262\n\
2263If the beginning or end of (the accessible part of) the buffer is reached\n\
2264in the middle of a parenthetical grouping, an error is signaled.\n\
2265If the beginning or end is reached between groupings\n\
2266but before count is used up, nil is returned.")
2267 (from, count)
2268 Lisp_Object from, count;
2269{
2270 CHECK_NUMBER (from, 0);
2271 CHECK_NUMBER (count, 1);
2272
2273 return scan_lists (XINT (from), XINT (count), 0, 1);
2274}
2275
2276DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2277 0, 0, 0,
2278 "Move point backward over any number of chars with prefix syntax.\n\
2279This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2280 ()
2281{
2282 int beg = BEGV;
6ec8bbd2 2283 int pos = PT;
93da5fff
KH
2284 int c;
2285 int temp_pos = pos;
2286
195d1361
RS
2287 if (pos > beg)
2288 {
2289 SETUP_SYNTAX_TABLE (pos, -1);
2290 }
93da5fff 2291 DEC_POS (temp_pos);
8489eb67 2292
93da5fff 2293 while (pos > beg && !char_quoted (temp_pos)
195d1361 2294 /* Previous statement updates syntax table. */
93da5fff
KH
2295 && ((c = FETCH_CHAR (temp_pos), SYNTAX (c) == Squote)
2296 || SYNTAX_PREFIX (c)))
2297 {
2298 pos = temp_pos;
2299 DEC_POS (temp_pos);
2300 }
8489eb67
RS
2301
2302 SET_PT (pos);
2303
2304 return Qnil;
2305}
2306\f
8489eb67 2307/* Parse forward from FROM to END,
e5d4f4dc
RS
2308 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2309 and return a description of the state of the parse at END.
c81a3712 2310 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2311 If COMMENTSTOP is 1, stop at the start of a comment.
2312 If COMMENTSTOP is -1, stop at the start or end of a comment,
2313 after the beginning of a string, or after the end of a string. */
8489eb67 2314
340f92b5 2315static void
c81a3712
RS
2316scan_sexps_forward (stateptr, from, end, targetdepth,
2317 stopbefore, oldstate, commentstop)
e5d4f4dc 2318 struct lisp_parse_state *stateptr;
8489eb67
RS
2319 register int from;
2320 int end, targetdepth, stopbefore;
2321 Lisp_Object oldstate;
c81a3712 2322 int commentstop;
8489eb67
RS
2323{
2324 struct lisp_parse_state state;
2325
2326 register enum syntaxcode code;
2327 struct level { int last, prev; };
2328 struct level levelstart[100];
2329 register struct level *curlevel = levelstart;
2330 struct level *endlevel = levelstart + 100;
93da5fff 2331 int prev;
8489eb67
RS
2332 register int depth; /* Paren depth of current scanning location.
2333 level - levelstart equals this except
2334 when the depth becomes negative. */
2335 int mindepth; /* Lowest DEPTH value seen. */
2336 int start_quoted = 0; /* Nonzero means starting after a char quote */
2337 Lisp_Object tem;
93da5fff 2338 int prev_from; /* Keep one character before FROM. */
195d1361
RS
2339 int boundary_stop = commentstop == -1;
2340 int nofence;
93da5fff
KH
2341
2342 prev_from = from;
2343 DEC_POS (prev_from);
2344
2345 /* Use this macro instead of `from++'. */
2346#define INC_FROM do { prev_from = from; INC_POS (from); } while (0)
8489eb67
RS
2347
2348 immediate_quit = 1;
2349 QUIT;
2350
195d1361
RS
2351 SETUP_SYNTAX_TABLE (from, 1);
2352
265a9e55 2353 if (NILP (oldstate))
8489eb67
RS
2354 {
2355 depth = 0;
2356 state.instring = -1;
2357 state.incomment = 0;
195d1361
RS
2358 state.comstyle = 0; /* comment style a by default. */
2359 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2360 }
2361 else
2362 {
2363 tem = Fcar (oldstate);
265a9e55 2364 if (!NILP (tem))
8489eb67
RS
2365 depth = XINT (tem);
2366 else
2367 depth = 0;
2368
2369 oldstate = Fcdr (oldstate);
2370 oldstate = Fcdr (oldstate);
2371 oldstate = Fcdr (oldstate);
2372 tem = Fcar (oldstate);
195d1361
RS
2373 /* Check whether we are inside string_fence-style string: */
2374 state.instring = ( !NILP (tem)
2375 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2376 : -1);
8489eb67
RS
2377
2378 oldstate = Fcdr (oldstate);
2379 tem = Fcar (oldstate);
265a9e55 2380 state.incomment = !NILP (tem);
8489eb67
RS
2381
2382 oldstate = Fcdr (oldstate);
2383 tem = Fcar (oldstate);
265a9e55 2384 start_quoted = !NILP (tem);
e5d4f4dc
RS
2385
2386 /* if the eight element of the list is nil, we are in comment
195d1361
RS
2387 style a. If it is non-nil, we are in comment style b */
2388 oldstate = Fcdr (oldstate);
e5d4f4dc 2389 oldstate = Fcdr (oldstate);
195d1361
RS
2390 tem = Fcar (oldstate);
2391 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2392 ? ST_COMMENT_STYLE : 1 );
2393
e5d4f4dc 2394 oldstate = Fcdr (oldstate);
e5d4f4dc 2395 tem = Fcar (oldstate);
195d1361 2396 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
8489eb67
RS
2397 }
2398 state.quoted = 0;
2399 mindepth = depth;
2400
2401 curlevel->prev = -1;
2402 curlevel->last = -1;
2403
195d1361 2404 /* Enter the loop at a place appropriate for initial state. */
8489eb67
RS
2405
2406 if (state.incomment) goto startincomment;
2407 if (state.instring >= 0)
2408 {
195d1361 2409 nofence = state.instring != ST_STRING_STYLE;
8489eb67
RS
2410 if (start_quoted) goto startquotedinstring;
2411 goto startinstring;
2412 }
2413 if (start_quoted) goto startquoted;
2414
2415 while (from < end)
2416 {
195d1361 2417 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2418 code = SYNTAX (FETCH_CHAR (from));
93da5fff 2419 INC_FROM;
4c920633 2420
8f9dc2ed 2421 if (code == Scomment)
195d1361 2422 state.comstr_start = prev_from;
4c920633 2423 else if (code == Scomment_fence)
e5d4f4dc
RS
2424 {
2425 /* Record the comment style we have entered so that only
2426 the comment-end sequence of the same style actually
2427 terminates the comment section. */
195d1361
RS
2428 state.comstyle = ( code == Scomment_fence
2429 ? ST_COMMENT_STYLE
2430 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2431 state.comstr_start = prev_from;
2432 if (code != Scomment_fence) INC_FROM;
e5d4f4dc 2433 code = Scomment;
e5d4f4dc 2434 }
4c920633
RS
2435 else if (from < end)
2436 if (SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from)))
2437 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
2438 /* Duplicate code to avoid a very complex if-expression
2439 which causes trouble for the SGI compiler. */
2440 {
2441 /* Record the comment style we have entered so that only
2442 the comment-end sequence of the same style actually
2443 terminates the comment section. */
2444 state.comstyle = ( code == Scomment_fence
2445 ? ST_COMMENT_STYLE
2446 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)));
2447 state.comstr_start = prev_from;
2448 if (code != Scomment_fence) INC_FROM;
2449 code = Scomment;
2450 }
e5d4f4dc 2451
93da5fff 2452 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from)))
8489eb67 2453 continue;
0220c518 2454 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2455 {
2456 case Sescape:
2457 case Scharquote:
2458 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2459 curlevel->last = prev_from;
8489eb67
RS
2460 startquoted:
2461 if (from == end) goto endquoted;
93da5fff 2462 INC_FROM;
8489eb67
RS
2463 goto symstarted;
2464 /* treat following character as a word constituent */
2465 case Sword:
2466 case Ssymbol:
2467 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2468 curlevel->last = prev_from;
8489eb67
RS
2469 symstarted:
2470 while (from < end)
2471 {
195d1361 2472 UPDATE_SYNTAX_TABLE_FORWARD (from);
0220c518 2473 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
8489eb67
RS
2474 {
2475 case Scharquote:
2476 case Sescape:
93da5fff 2477 INC_FROM;
8489eb67
RS
2478 if (from == end) goto endquoted;
2479 break;
2480 case Sword:
2481 case Ssymbol:
2482 case Squote:
2483 break;
2484 default:
2485 goto symdone;
2486 }
93da5fff 2487 INC_FROM;
8489eb67
RS
2488 }
2489 symdone:
2490 curlevel->prev = curlevel->last;
2491 break;
2492
5a28e48c 2493 startincomment:
195d1361 2494 if (commentstop == 1)
5a28e48c
RS
2495 goto done;
2496 if (from != BEGV)
2497 {
2498 /* Enter the loop in the middle so that we find
2499 a 2-char comment ender if we start in the middle of it. */
93da5fff 2500 prev = FETCH_CHAR (prev_from);
5a28e48c
RS
2501 goto startincomment_1;
2502 }
2503 /* At beginning of buffer, enter the loop the ordinary way. */
195d1361
RS
2504 state.incomment = 1;
2505 goto commentloop;
5a28e48c 2506
8489eb67
RS
2507 case Scomment:
2508 state.incomment = 1;
195d1361
RS
2509 if (commentstop || boundary_stop) goto done;
2510 commentloop:
8489eb67
RS
2511 while (1)
2512 {
2513 if (from == end) goto done;
195d1361 2514 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc
RS
2515 prev = FETCH_CHAR (from);
2516 if (SYNTAX (prev) == Sendcomment
2517 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2518 /* Only terminate the comment section if the endcomment
2519 of the same style as the start sequence has been
2520 encountered. */
8489eb67 2521 break;
195d1361
RS
2522 if (state.comstyle == ST_COMMENT_STYLE
2523 && SYNTAX (prev) == Scomment_fence)
2524 break;
93da5fff 2525 INC_FROM;
5a28e48c 2526 startincomment_1:
8489eb67 2527 if (from < end && SYNTAX_COMEND_FIRST (prev)
e5d4f4dc
RS
2528 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
2529 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
2530 /* Only terminate the comment section if the end-comment
2531 sequence of the same style as the start sequence has
2532 been encountered. */
195d1361 2533 { break; }
8489eb67 2534 }
195d1361 2535 INC_FROM;
8489eb67 2536 state.incomment = 0;
e5d4f4dc 2537 state.comstyle = 0; /* reset the comment style */
195d1361 2538 if (boundary_stop) goto done;
8489eb67
RS
2539 break;
2540
2541 case Sopen:
2542 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2543 depth++;
2544 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2545 curlevel->last = prev_from;
8489eb67
RS
2546 if (++curlevel == endlevel)
2547 error ("Nesting too deep for parser");
2548 curlevel->prev = -1;
2549 curlevel->last = -1;
30844415 2550 if (targetdepth == depth) goto done;
8489eb67
RS
2551 break;
2552
2553 case Sclose:
2554 depth--;
2555 if (depth < mindepth)
2556 mindepth = depth;
2557 if (curlevel != levelstart)
2558 curlevel--;
2559 curlevel->prev = curlevel->last;
30844415 2560 if (targetdepth == depth) goto done;
8489eb67
RS
2561 break;
2562
2563 case Sstring:
195d1361
RS
2564 case Sstring_fence:
2565 state.comstr_start = from - 1;
8489eb67 2566 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2567 curlevel->last = prev_from;
195d1361
RS
2568 state.instring = (code == Sstring
2569 ? (FETCH_CHAR (prev_from))
2570 : ST_STRING_STYLE);
2571 if (boundary_stop) goto done;
8489eb67 2572 startinstring:
195d1361 2573 {
644ea4df 2574 nofence = state.instring != ST_STRING_STYLE;
195d1361 2575
644ea4df
RS
2576 while (1)
2577 {
2578 int c;
195d1361 2579
644ea4df
RS
2580 if (from >= end) goto done;
2581 c = FETCH_CHAR (from);
2582 if (nofence && c == state.instring) break;
2583 UPDATE_SYNTAX_TABLE_FORWARD (from);
2584 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
2585 {
2586 case Sstring_fence:
2587 if (!nofence) goto string_end;
2588 break;
2589 case Scharquote:
2590 case Sescape:
2591 INC_FROM;
2592 startquotedinstring:
2593 if (from >= end) goto endquoted;
195d1361 2594 }
644ea4df
RS
2595 INC_FROM;
2596 }
195d1361
RS
2597 }
2598 string_end:
8489eb67
RS
2599 state.instring = -1;
2600 curlevel->prev = curlevel->last;
93da5fff 2601 INC_FROM;
195d1361 2602 if (boundary_stop) goto done;
8489eb67
RS
2603 break;
2604
2605 case Smath:
2606 break;
2607 }
2608 }
2609 goto done;
2610
2611 stop: /* Here if stopping before start of sexp. */
93da5fff 2612 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
2613 goto done; /* but return the position before it. */
2614
2615 endquoted:
2616 state.quoted = 1;
2617 done:
2618 state.depth = depth;
2619 state.mindepth = mindepth;
2620 state.thislevelstart = curlevel->prev;
2621 state.prevlevelstart
2622 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2623 state.location = from;
2624 immediate_quit = 0;
2625
e5d4f4dc 2626 *stateptr = state;
8489eb67
RS
2627}
2628
2629/* This comment supplies the doc string for parse-partial-sexp,
2630 for make-docfile to see. We cannot put this in the real DEFUN
2631 due to limits in the Unix cpp.
2632
c81a3712 2633DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
2634 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2635Parsing stops at TO or when certain criteria are met;\n\
2636 point is set to where parsing stops.\n\
2637If fifth arg STATE is omitted or nil,\n\
2638 parsing assumes that FROM is the beginning of a function.\n\
195d1361 2639Value is a list of nine elements describing final state of parsing:\n\
af50f9e5
RS
2640 0. depth in parens.\n\
2641 1. character address of start of innermost containing list; nil if none.\n\
2642 2. character address of start of last complete sexp terminated.\n\
2643 3. non-nil if inside a string.\n\
195d1361 2644 (it is the character that will terminate the string,\n\
bff37d2a 2645 or t if the string should be terminated by a generic string delimiter.)\n\
af50f9e5
RS
2646 4. t if inside a comment.\n\
2647 5. t if following a quote character.\n\
2648 6. the minimum paren-depth encountered during this scan.\n\
bff37d2a
RS
2649 7. t if in a comment of style b; `syntax-table' if the comment\n\
2650 should be terminated by a generic comment delimiter.\n\
2651 8. character address of start of comment or string; nil if not in one.\n\
8489eb67
RS
2652If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2653in parentheses becomes equal to TARGETDEPTH.\n\
2654Fourth arg STOPBEFORE non-nil means stop when come to\n\
2655 any character that starts a sexp.\n\
bff37d2a
RS
2656Fifth arg STATE is a nine-element list like what this function returns.\n\
2657 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2658 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2659Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
2660 If it is `syntax-table', stop after the start of a comment or a string,\n\
2661 or after end of a comment or a string.")
c81a3712 2662 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
2663*/
2664
c81a3712 2665DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 2666 0 /* See immediately above */)
c81a3712
RS
2667 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2668 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
2669{
2670 struct lisp_parse_state state;
2671 int target;
2672
265a9e55 2673 if (!NILP (targetdepth))
8489eb67
RS
2674 {
2675 CHECK_NUMBER (targetdepth, 3);
2676 target = XINT (targetdepth);
2677 }
2678 else
2679 target = -100000; /* We won't reach this depth */
2680
2681 validate_region (&from, &to);
e5d4f4dc 2682 scan_sexps_forward (&state, XINT (from), XINT (to),
c81a3712 2683 target, !NILP (stopbefore), oldstate,
195d1361
RS
2684 (NILP (commentstop)
2685 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
2686
2687 SET_PT (state.location);
2688
2689 return Fcons (make_number (state.depth),
2690 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2691 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
195d1361
RS
2692 Fcons (state.instring >= 0
2693 ? (state.instring == ST_STRING_STYLE
2694 ? Qt : make_number (state.instring)) : Qnil,
8489eb67
RS
2695 Fcons (state.incomment ? Qt : Qnil,
2696 Fcons (state.quoted ? Qt : Qnil,
bff37d2a
RS
2697 Fcons (make_number (state.mindepth),
2698 Fcons ((state.comstyle
2699 ? (state.comstyle == ST_COMMENT_STYLE
2700 ? Qsyntax_table : Qt) :
2701 Qnil),
2702 Fcons ((state.incomment || state.instring
2703 ? make_number (state.comstr_start)
2704 : Qnil),
2705 Qnil)))))))));
8489eb67
RS
2706}
2707\f
2708init_syntax_once ()
2709{
78f9a1f7 2710 register int i, c;
8ea151b2 2711 Lisp_Object temp;
8489eb67 2712
5ebaddf5
RS
2713 /* This has to be done here, before we call Fmake_char_table. */
2714 Qsyntax_table = intern ("syntax-table");
2715 staticpro (&Qsyntax_table);
2716
2717 /* Intern this now in case it isn't already done.
2718 Setting this variable twice is harmless.
2719 But don't staticpro it here--that is done in alloc.c. */
2720 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2721
93da5fff 2722 /* Create objects which can be shared among syntax tables. */
44d8a793 2723 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
93da5fff
KH
2724 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2725 XVECTOR (Vsyntax_code_object)->contents[i]
2726 = Fcons (make_number (i), Qnil);
2727
5ebaddf5
RS
2728 /* Now we are ready to set up this property, so we can
2729 create syntax tables. */
2730 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2731
93da5fff 2732 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 2733
5ebaddf5 2734 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 2735
93da5fff 2736 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 2737 for (i = 'a'; i <= 'z'; i++)
8ea151b2 2738 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2739 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 2740 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2741 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
2742 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2743
2744 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2745 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2746
2747 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2748 Fcons (make_number (Sopen), make_number (')')));
2749 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2750 Fcons (make_number (Sclose), make_number ('(')));
2751 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2752 Fcons (make_number (Sopen), make_number (']')));
2753 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2754 Fcons (make_number (Sclose), make_number ('[')));
2755 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2756 Fcons (make_number (Sopen), make_number ('}')));
2757 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2758 Fcons (make_number (Sclose), make_number ('{')));
2759 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2760 Fcons (make_number ((int) Sstring), Qnil));
2761 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2762 Fcons (make_number ((int) Sescape), Qnil));
2763
93da5fff 2764 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 2765 for (i = 0; i < 10; i++)
78f9a1f7
KH
2766 {
2767 c = "_-+*/&|<>="[i];
2768 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2769 }
8489eb67 2770
93da5fff 2771 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 2772 for (i = 0; i < 12; i++)
78f9a1f7
KH
2773 {
2774 c = ".,;:?!#@~^'`"[i];
2775 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2776 }
8489eb67
RS
2777}
2778
2779syms_of_syntax ()
2780{
2781 Qsyntax_table_p = intern ("syntax-table-p");
2782 staticpro (&Qsyntax_table_p);
2783
93da5fff
KH
2784 staticpro (&Vsyntax_code_object);
2785
7bf5e9e4
RS
2786 Qscan_error = intern ("scan-error");
2787 staticpro (&Qscan_error);
2788 Fput (Qscan_error, Qerror_conditions,
2789 Fcons (Qerror, Qnil));
2790 Fput (Qscan_error, Qerror_message,
2791 build_string ("Scan error"));
2792
8489eb67
RS
2793 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2794 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2795
195d1361
RS
2796 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2797 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2798The value of this property should be either a syntax table, or a cons\n\
2799of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2800syntax code, MATCHCHAR being nil or the character to match (which is\n\
2801relevant only for open/close type.");
2802
8489eb67
RS
2803 words_include_escapes = 0;
2804 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2805 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2806
2807 defsubr (&Ssyntax_table_p);
2808 defsubr (&Ssyntax_table);
2809 defsubr (&Sstandard_syntax_table);
2810 defsubr (&Scopy_syntax_table);
2811 defsubr (&Sset_syntax_table);
2812 defsubr (&Schar_syntax);
beefa22e 2813 defsubr (&Smatching_paren);
8489eb67
RS
2814 defsubr (&Smodify_syntax_entry);
2815 defsubr (&Sdescribe_syntax);
2816
2817 defsubr (&Sforward_word);
2818
195d1361
RS
2819 defsubr (&Sskip_chars_forward);
2820 defsubr (&Sskip_chars_backward);
2821 defsubr (&Sskip_syntax_forward);
2822 defsubr (&Sskip_syntax_backward);
2823
b3cfe0c8 2824 defsubr (&Sforward_comment);
8489eb67
RS
2825 defsubr (&Sscan_lists);
2826 defsubr (&Sscan_sexps);
2827 defsubr (&Sbackward_prefix_chars);
2828 defsubr (&Sparse_partial_sexp);
2829}