* lib-src/fakemail.c (action): Convert function definitions to standard C.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
429ab54e 2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
114f9c96 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
429ab54e 4 Free Software Foundation, Inc.
8489eb67
RS
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
8489eb67 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
8489eb67
RS
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67 23#include <ctype.h>
d7306fe6 24#include <setjmp.h>
8489eb67
RS
25#include "lisp.h"
26#include "commands.h"
27#include "buffer.h"
5c7b02ab 28#include "character.h"
e35f6ff7 29#include "keymap.h"
a1bc88d4 30#include "regex.h"
195d1361
RS
31
32/* Make syntax table lookup grant data in gl_state. */
33#define SYNTAX_ENTRY_VIA_PROPERTY
34
8489eb67 35#include "syntax.h"
195d1361
RS
36#include "intervals.h"
37
38/* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
41
42#define ST_COMMENT_STYLE (256 + 1)
43#define ST_STRING_STYLE (256 + 2)
93da5fff 44#include "category.h"
8489eb67 45
7bf5e9e4 46Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
47
48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
bd25db08
KH
51/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52int multibyte_syntax_as_symbol;
53
8ea151b2
RS
54/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57Lisp_Object syntax_temp;
58
f4ed767f
GM
59/* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
62
63int open_paren_in_column_0_is_defun_start;
64
e5d4f4dc
RS
65/* This is the internal form of the parse state used in parse-partial-sexp. */
66
67struct lisp_parse_state
68 {
6449674e
SM
69 int depth; /* Depth at end of parsing. */
70 int instring; /* -1 if not within string, else desired terminator. */
71 int incomment; /* -1 if in unnestable comment else comment nesting */
72 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
73 int quoted; /* Nonzero if just after an escape char at end of parsing */
74 int mindepth; /* Minimum depth seen while scanning. */
75 /* Char number of most recent start-of-expression at current level */
76 EMACS_INT thislevelstart;
77 /* Char number of start of containing expression */
78 EMACS_INT prevlevelstart;
79 EMACS_INT location; /* Char number at which parsing stopped. */
80 EMACS_INT comstr_start; /* Position of last comment/string starter. */
81 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
82 of levels (starting from outermost). */
e5d4f4dc
RS
83 };
84\f
37bef230
RS
85/* These variables are a cache for finding the start of a defun.
86 find_start_pos is the place for which the defun start was found.
87 find_start_value is the defun start position found for it.
6a140a74 88 find_start_value_byte is the corresponding byte position.
37bef230
RS
89 find_start_buffer is the buffer it was found in.
90 find_start_begv is the BEGV value when it was found.
91 find_start_modiff is the value of MODIFF when it was found. */
92
8e2911c2
AS
93static EMACS_INT find_start_pos;
94static EMACS_INT find_start_value;
95static EMACS_INT find_start_value_byte;
37bef230 96static struct buffer *find_start_buffer;
8e2911c2 97static EMACS_INT find_start_begv;
37bef230 98static int find_start_modiff;
6a140a74
RS
99
100
f57e2426
J
101static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
102static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
103static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
104static void scan_sexps_forward (struct lisp_parse_state *,
105 EMACS_INT, EMACS_INT, EMACS_INT, int,
106 int, Lisp_Object, int);
107static int in_classes (int, Lisp_Object);
195d1361
RS
108\f
109
110struct gl_state_s gl_state; /* Global state of syntax parser. */
111
112INTERVAL interval_of ();
113#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
114 to scan to property-change. */
115
6a140a74
RS
116/* Update gl_state to an appropriate interval which contains CHARPOS. The
117 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 118 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 119 valid at start, the rest is filled basing on OBJECT.
195d1361 120
6a140a74 121 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
122 direction than the intervals - or in an interval. We update the
123 current syntax-table basing on the property of this interval, and
6a140a74 124 update the interval to start further than CHARPOS - or be
195d1361 125 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 126 charpos to call this subroutine again - or be before/after the
195d1361
RS
127 start/end of OBJECT. */
128
129void
6a140a74
RS
130update_syntax_table (charpos, count, init, object)
131 int charpos, count, init;
195d1361
RS
132 Lisp_Object object;
133{
134 Lisp_Object tmp_table;
4ffe723b 135 int cnt = 0, invalidate = 1;
5a774522 136 INTERVAL i;
195d1361
RS
137
138 if (init)
139 {
bb0de084 140 gl_state.old_prop = Qnil;
195d1361
RS
141 gl_state.start = gl_state.b_property;
142 gl_state.stop = gl_state.e_property;
bb0de084
SM
143 i = interval_of (charpos, object);
144 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
145 invalidate = 0;
146 if (NULL_INTERVAL_P (i))
147 return;
f902a008 148 /* interval_of updates only ->position of the return value, so
d80f4cc7 149 update the parents manually to speed up update_interval. */
bb0de084 150 while (!NULL_PARENT (i))
d80f4cc7
RS
151 {
152 if (AM_RIGHT_CHILD (i))
439d5cb4 153 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 154 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
155 - TOTAL_LENGTH (INTERVAL_PARENT (i))
156 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 157 else
439d5cb4 158 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 159 + TOTAL_LENGTH (i);
439d5cb4 160 i = INTERVAL_PARENT (i);
d80f4cc7
RS
161 }
162 i = gl_state.forward_i;
bb0de084 163 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 164 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
165 goto update;
166 }
5a774522 167 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 168
423e705d 169 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 170 or further off. */
195d1361
RS
171 if (NULL_INTERVAL_P (i))
172 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 173 else if (charpos < i->position) /* Move left. */
195d1361
RS
174 {
175 if (count > 0)
f902a008 176 error ("Error in syntax_table logic for intervals <-");
195d1361 177 /* Update the interval. */
6a140a74 178 i = update_interval (i, charpos);
bb0de084 179 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
180 {
181 invalidate = 0;
195d1361 182 gl_state.forward_i = i;
ee0cdb48 183 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 184 }
423e705d 185 }
6a140a74 186 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
187 {
188 if (count < 0)
f902a008 189 error ("Error in syntax_table logic for intervals ->");
195d1361 190 /* Update the interval. */
6a140a74 191 i = update_interval (i, charpos);
bb0de084 192 if (i->position != gl_state.e_property)
195d1361
RS
193 {
194 invalidate = 0;
195d1361 195 gl_state.backward_i = i;
bb0de084 196 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
197 }
198 }
195d1361
RS
199
200 update:
201 tmp_table = textget (i->plist, Qsyntax_table);
202
203 if (invalidate)
204 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 205
423e705d
SM
206 if (invalidate) /* Did not get to adjacent interval. */
207 { /* with the same table => */
208 /* invalidate the old range. */
195d1361
RS
209 if (count > 0)
210 {
211 gl_state.backward_i = i;
bb0de084
SM
212 gl_state.b_property = i->position - gl_state.offset;
213 }
214 else
195d1361 215 {
bb0de084 216 gl_state.forward_i = i;
ee0cdb48 217 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
218 }
219 }
37bef230 220
bb0de084 221 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 222 {
bb0de084
SM
223 gl_state.current_syntax_table = tmp_table;
224 gl_state.old_prop = tmp_table;
225 if (EQ (Fsyntax_table_p (tmp_table), Qt))
226 {
227 gl_state.use_global = 0;
7d0393cf 228 }
bb0de084
SM
229 else if (CONSP (tmp_table))
230 {
231 gl_state.use_global = 1;
232 gl_state.global_code = tmp_table;
233 }
7d0393cf 234 else
bb0de084
SM
235 {
236 gl_state.use_global = 0;
237 gl_state.current_syntax_table = current_buffer->syntax_table;
238 }
195d1361
RS
239 }
240
241 while (!NULL_INTERVAL_P (i))
242 {
243 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
244 {
245 if (count > 0)
bb0de084
SM
246 {
247 gl_state.e_property = i->position - gl_state.offset;
248 gl_state.forward_i = i;
249 }
250 else
251 {
5a774522
SM
252 gl_state.b_property
253 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
254 gl_state.backward_i = i;
255 }
256 return;
195d1361 257 }
7d0393cf 258 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
259 {
260 if (count > 0)
bb0de084 261 {
5a774522
SM
262 gl_state.e_property
263 = i->position + LENGTH (i) - gl_state.offset
264 /* e_property at EOB is not set to ZV but to ZV+1, so that
265 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
266 having to check eob between the two. */
267 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
268 gl_state.forward_i = i;
269 }
270 else
271 {
272 gl_state.b_property = i->position - gl_state.offset;
273 gl_state.backward_i = i;
274 }
275 return;
195d1361
RS
276 }
277 cnt++;
278 i = count > 0 ? next_interval (i) : previous_interval (i);
279 }
bb0de084
SM
280 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
281 if (count > 0)
282 gl_state.e_property = gl_state.stop;
283 else
284 gl_state.b_property = gl_state.start;
195d1361
RS
285}
286\f
6a140a74
RS
287/* Returns TRUE if char at CHARPOS is quoted.
288 Global syntax-table data should be set up already to be good at CHARPOS
289 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
290
291static int
6449674e 292char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
195d1361
RS
293{
294 register enum syntaxcode code;
6449674e 295 register EMACS_INT beg = BEGV;
195d1361 296 register int quoted = 0;
6449674e 297 EMACS_INT orig = charpos;
6a140a74 298
02d8b017 299 while (charpos > beg)
195d1361 300 {
ab229fdd 301 int c;
02d8b017 302 DEC_BOTH (charpos, bytepos);
ab229fdd 303
6a140a74 304 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
327719ee 305 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
ab229fdd 306 code = SYNTAX (c);
6a140a74
RS
307 if (! (code == Scharquote || code == Sescape))
308 break;
309
6a140a74 310 quoted = !quoted;
195d1361 311 }
6a140a74
RS
312
313 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
314 return quoted;
315}
6a140a74
RS
316
317/* Return the bytepos one character after BYTEPOS.
318 We assume that BYTEPOS is not at the end of the buffer. */
319
6449674e 320INLINE EMACS_INT
6a140a74 321inc_bytepos (bytepos)
6449674e 322 EMACS_INT bytepos;
6a140a74 323{
ef316cf0
RS
324 if (NILP (current_buffer->enable_multibyte_characters))
325 return bytepos + 1;
326
6a140a74
RS
327 INC_POS (bytepos);
328 return bytepos;
329}
330
331/* Return the bytepos one character before BYTEPOS.
332 We assume that BYTEPOS is not at the start of the buffer. */
333
6449674e 334INLINE EMACS_INT
6a140a74 335dec_bytepos (bytepos)
6449674e 336 EMACS_INT bytepos;
6a140a74 337{
ef316cf0
RS
338 if (NILP (current_buffer->enable_multibyte_characters))
339 return bytepos - 1;
340
6a140a74
RS
341 DEC_POS (bytepos);
342 return bytepos;
343}
195d1361 344\f
2b34df4e 345/* Return a defun-start position before POS and not too far before.
7cc80f0a
RS
346 It should be the last one before POS, or nearly the last.
347
348 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 349 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
350
351 We record the information about where the scan started
352 and what its result was, so that another call in the same area
353 can return the same value very quickly.
195d1361
RS
354
355 There is no promise at which position the global syntax data is
356 valid on return from the subroutine, so the caller should explicitly
357 update the global data. */
37bef230 358
6449674e 359static EMACS_INT
6a140a74 360find_defun_start (pos, pos_byte)
8e2911c2 361 EMACS_INT pos, pos_byte;
37bef230 362{
8e2911c2 363 EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
37bef230 364
1fd1cc2f
RS
365 if (!open_paren_in_column_0_is_defun_start)
366 {
367 find_start_value_byte = BEGV_BYTE;
368 return BEGV;
369 }
370
37bef230
RS
371 /* Use previous finding, if it's valid and applies to this inquiry. */
372 if (current_buffer == find_start_buffer
373 /* Reuse the defun-start even if POS is a little farther on.
374 POS might be in the next defun, but that's ok.
375 Our value may not be the best possible, but will still be usable. */
376 && pos <= find_start_pos + 1000
377 && pos >= find_start_value
378 && BEGV == find_start_begv
379 && MODIFF == find_start_modiff)
380 return find_start_value;
381
382 /* Back up to start of line. */
6a140a74 383 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 384
195d1361
RS
385 /* We optimize syntax-table lookup for rare updates. Thus we accept
386 only those `^\s(' which are good in global _and_ text-property
387 syntax-tables. */
d48cd3f4 388 SETUP_BUFFER_SYNTAX_TABLE ();
1fd1cc2f 389 while (PT > BEGV)
37bef230 390 {
ab229fdd
AS
391 int c;
392
1fd1cc2f
RS
393 /* Open-paren at start of line means we may have found our
394 defun-start. */
327719ee 395 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 396 if (SYNTAX (c) == Sopen)
195d1361 397 {
1fd1cc2f 398 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
327719ee 399 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 400 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
401 break;
402 /* Now fallback to the default value. */
d48cd3f4 403 SETUP_BUFFER_SYNTAX_TABLE ();
195d1361 404 }
1fd1cc2f
RS
405 /* Move to beg of previous line. */
406 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
407 }
408
409 /* Record what we found, for the next try. */
6a140a74
RS
410 find_start_value = PT;
411 find_start_value_byte = PT_BYTE;
37bef230
RS
412 find_start_buffer = current_buffer;
413 find_start_modiff = MODIFF;
414 find_start_begv = BEGV;
415 find_start_pos = pos;
416
6a140a74
RS
417 TEMP_SET_PT_BOTH (opoint, opoint_byte);
418
37bef230
RS
419 return find_start_value;
420}
421\f
f902a008
RS
422/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
423
424static int
425prev_char_comend_first (pos, pos_byte)
426 int pos, pos_byte;
427{
428 int c, val;
429
430 DEC_BOTH (pos, pos_byte);
431 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
432 c = FETCH_CHAR (pos_byte);
433 val = SYNTAX_COMEND_FIRST (c);
434 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
435 return val;
436}
437
438/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
439
3f679f55
SM
440/* static int
441 * prev_char_comstart_first (pos, pos_byte)
442 * int pos, pos_byte;
443 * {
444 * int c, val;
7d0393cf 445 *
3f679f55
SM
446 * DEC_BOTH (pos, pos_byte);
447 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
448 * c = FETCH_CHAR (pos_byte);
449 * val = SYNTAX_COMSTART_FIRST (c);
450 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
451 * return val;
452 * } */
f902a008 453
6a140a74
RS
454/* Checks whether charpos FROM is at the end of a comment.
455 FROM_BYTE is the bytepos corresponding to FROM.
456 Do not move back before STOP.
457
458 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
459 return -1 otherwise.
460
461 If successful, store the charpos of the comment's beginning
462 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
463
464 Global syntax data remains valid for backward search starting at
465 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
466
467static int
95ff8dfc 468back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
932e6895 469 EMACS_INT from, from_byte, stop;
95ff8dfc 470 int comnested, comstyle;
932e6895 471 EMACS_INT *charpos_ptr, *bytepos_ptr;
195d1361
RS
472{
473 /* Look back, counting the parity of string-quotes,
474 and recording the comment-starters seen.
475 When we reach a safe place, assume that's not in a string;
476 then step the main scan to the earliest comment-starter seen
477 an even number of string quotes away from the safe place.
478
479 OFROM[I] is position of the earliest comment-starter seen
480 which is I+2X quotes from the comment-end.
481 PARITY is current parity of quotes from the comment end. */
3ee5041c 482 int string_style = -1; /* Presumed outside of any string. */
195d1361 483 int string_lossage = 0;
f7c436c1 484 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 485 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
486 comment-starter we might see later could be a false positive (hidden
487 inside another comment).
488 Test case: { a (* b } c (* d *) */
489 int comment_lossage = 0;
8e2911c2
AS
490 EMACS_INT comment_end = from;
491 EMACS_INT comment_end_byte = from_byte;
492 EMACS_INT comstart_pos = 0;
493 EMACS_INT comstart_byte;
eb35b628
RS
494 /* Place where the containing defun starts,
495 or 0 if we didn't come across it yet. */
8e2911c2
AS
496 EMACS_INT defun_start = 0;
497 EMACS_INT defun_start_byte = 0;
195d1361 498 register enum syntaxcode code;
95ff8dfc 499 int nesting = 1; /* current comment nesting */
ea315ed6 500 int c;
3f679f55
SM
501 int syntax = 0;
502
503 /* FIXME: A }} comment-ender style leads to incorrect behavior
504 in the case of {{ c }}} because we ignore the last two chars which are
505 assumed to be comment-enders although they aren't. */
195d1361
RS
506
507 /* At beginning of range to scan, we're outside of strings;
508 that determines quote parity to the comment-end. */
509 while (from != stop)
510 {
3f679f55
SM
511 int temp_byte, prev_syntax;
512 int com2start, com2end;
6a140a74 513
195d1361 514 /* Move back and examine a character. */
6a140a74 515 DEC_BOTH (from, from_byte);
195d1361
RS
516 UPDATE_SYNTAX_TABLE_BACKWARD (from);
517
3f679f55 518 prev_syntax = syntax;
b7dbcc19 519 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3f679f55 520 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
521 code = SYNTAX (c);
522
3f679f55
SM
523 /* Check for 2-char comment markers. */
524 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
525 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
526 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
527 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
528 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
529 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
530 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
531
532 /* Nasty cases with overlapping 2-char comment markers:
533 - snmp-mode: -- c -- foo -- c --
534 --- c --
535 ------ c --
536 - c-mode: *||*
537 |* *|* *|
538 |*| |* |*|
539 /// */
540
541 /* If a 2-char comment sequence partly overlaps with another,
542 we don't try to be clever. */
543 if (from > stop && (com2end || com2start))
195d1361 544 {
3f679f55
SM
545 int next = from, next_byte = from_byte, next_c, next_syntax;
546 DEC_BOTH (next, next_byte);
547 UPDATE_SYNTAX_TABLE_BACKWARD (next);
b7dbcc19 548 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
3f679f55
SM
549 next_syntax = SYNTAX_WITH_FLAGS (next_c);
550 if (((com2start || comnested)
551 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
552 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
553 || ((com2end || comnested)
554 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
555 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
556 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
557 goto lossage;
558 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 559 }
3f679f55
SM
560
561 if (com2start && comstart_pos == 0)
562 /* We're looking at a comment starter. But it might be a comment
563 ender as well (see snmp-mode). The first time we see one, we
564 need to consider it as a comment starter,
565 and the subsequent times as a comment ender. */
566 com2end = 0;
567
568 /* Turn a 2-char comment sequences into the appropriate syntax. */
569 if (com2end)
570 code = Sendcomment;
571 else if (com2start)
572 code = Scomment;
573 /* Ignore comment starters of a different style. */
574 else if (code == Scomment
575 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
576 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 577 continue;
195d1361 578
9828a477
KH
579 /* Ignore escaped characters, except comment-enders. */
580 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
581 continue;
582
3ee5041c 583 switch (code)
195d1361 584 {
3ee5041c
SM
585 case Sstring_fence:
586 case Scomment_fence:
587 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
588 case Sstring:
589 /* Track parity of quotes. */
590 if (string_style == -1)
591 /* Entering a string. */
592 string_style = c;
593 else if (string_style == c)
594 /* Leaving the string. */
595 string_style = -1;
596 else
597 /* If we have two kinds of string delimiters.
598 There's no way to grok this scanning backwards. */
195d1361 599 string_lossage = 1;
3ee5041c 600 break;
7d0393cf 601
3ee5041c
SM
602 case Scomment:
603 /* We've already checked that it is the relevant comstyle. */
f7c436c1 604 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
605 /* There are odd string quotes involved, so let's be careful.
606 Test case in Pascal: " { " a { " } */
607 goto lossage;
608
f7c436c1
SM
609 if (!comnested)
610 {
611 /* Record best comment-starter so far. */
612 comstart_pos = from;
613 comstart_byte = from_byte;
614 }
615 else if (--nesting <= 0)
95ff8dfc
RS
616 /* nested comments have to be balanced, so we don't need to
617 keep looking for earlier ones. We use here the same (slightly
618 incorrect) reasoning as below: since it is followed by uniform
619 paired string quotes, this comment-start has to be outside of
620 strings, else the comment-end itself would be inside a string. */
621 goto done;
3ee5041c
SM
622 break;
623
02010917 624 case Sendcomment:
3f679f55 625 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 626 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 627 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
628 /* This is the same style of comment ender as ours. */
629 {
630 if (comnested)
631 nesting++;
632 else
633 /* Anything before that can't count because it would match
634 this comment-ender rather than ours. */
635 from = stop; /* Break out of the loop. */
636 }
f7c436c1
SM
637 else if (comstart_pos != 0 || c != '\n')
638 /* We're mixing comment styles here, so we'd better be careful.
639 The (comstart_pos != 0 || c != '\n') check is not quite correct
640 (we should just always set comment_lossage), but removing it
641 would imply that any multiline comment in C would go through
642 lossage, which seems overkill.
643 The failure should only happen in the rare cases such as
644 { (* } *) */
645 comment_lossage = 1;
02010917 646 break;
195d1361 647
02010917
SM
648 case Sopen:
649 /* Assume a defun-start point is outside of strings. */
650 if (open_paren_in_column_0_is_defun_start
651 && (from == stop
652 || (temp_byte = dec_bytepos (from_byte),
653 FETCH_CHAR (temp_byte) == '\n')))
654 {
655 defun_start = from;
656 defun_start_byte = from_byte;
657 from = stop; /* Break out of the loop. */
658 }
eb35b628 659 break;
02010917
SM
660
661 default:
f7c436c1 662 break;
eb35b628 663 }
195d1361
RS
664 }
665
666 if (comstart_pos == 0)
667 {
668 from = comment_end;
6a140a74 669 from_byte = comment_end_byte;
195d1361
RS
670 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
671 }
f7c436c1
SM
672 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
673 or `done'), then we've found the beginning of the non-nested comment. */
674 else if (1) /* !comnested */
195d1361
RS
675 {
676 from = comstart_pos;
6a140a74 677 from_byte = comstart_byte;
789f3320 678 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
679 }
680 else
681 {
3ee5041c
SM
682 struct lisp_parse_state state;
683 lossage:
195d1361
RS
684 /* We had two kinds of string delimiters mixed up
685 together. Decode this going forwards.
02010917 686 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
687 to the one in question; this records where we
688 last passed a comment starter. */
eb35b628
RS
689 /* If we did not already find the defun start, find it now. */
690 if (defun_start == 0)
691 {
692 defun_start = find_defun_start (comment_end, comment_end_byte);
693 defun_start_byte = find_start_value_byte;
694 }
02010917 695 do
195d1361 696 {
02010917
SM
697 scan_sexps_forward (&state,
698 defun_start, defun_start_byte,
699 comment_end, -10000, 0, Qnil, 0);
700 defun_start = comment_end;
701 if (state.incomment == (comnested ? 1 : -1)
702 && state.comstyle == comstyle)
703 from = state.comstr_start;
704 else
705 {
706 from = comment_end;
707 if (state.incomment)
708 /* If comment_end is inside some other comment, maybe ours
709 is nested, so we need to try again from within the
710 surrounding comment. Example: { a (* " *) */
711 {
712 /* FIXME: We should advance by one or two chars. */
713 defun_start = state.comstr_start + 2;
714 defun_start_byte = CHAR_TO_BYTE (defun_start);
715 }
716 }
717 } while (defun_start < comment_end);
718
6a140a74 719 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
720 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
721 }
7d0393cf 722
95ff8dfc 723 done:
6a140a74
RS
724 *charpos_ptr = from;
725 *bytepos_ptr = from_byte;
726
1aa963c8 727 return (from == comment_end) ? -1 : from;
195d1361
RS
728}
729\f
8489eb67 730DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
731 doc: /* Return t if OBJECT is a syntax table.
732Currently, any char-table counts as a syntax table. */)
733 (object)
2203e1e8 734 Lisp_Object object;
8489eb67 735{
2203e1e8 736 if (CHAR_TABLE_P (object)
e704cb4b 737 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
738 return Qt;
739 return Qnil;
740}
741
8ea151b2 742static void
8489eb67
RS
743check_syntax_table (obj)
744 Lisp_Object obj;
745{
47f5f6ae
KS
746 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
747 Qsyntax_table_p, obj);
7d0393cf 748}
8489eb67 749
8489eb67 750DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
751 doc: /* Return the current syntax table.
752This is the one specified by the current buffer. */)
753 ()
8489eb67
RS
754{
755 return current_buffer->syntax_table;
756}
757
758DEFUN ("standard-syntax-table", Fstandard_syntax_table,
759 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
760 doc: /* Return the standard syntax table.
761This is the one used for new buffers. */)
762 ()
8489eb67
RS
763{
764 return Vstandard_syntax_table;
765}
766
767DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
768 doc: /* Construct a new syntax table and return it.
769It is a copy of the TABLE, which defaults to the standard syntax table. */)
770 (table)
8489eb67
RS
771 Lisp_Object table;
772{
8ea151b2
RS
773 Lisp_Object copy;
774
265a9e55 775 if (!NILP (table))
8ea151b2
RS
776 check_syntax_table (table);
777 else
778 table = Vstandard_syntax_table;
779
780 copy = Fcopy_sequence (table);
0f867324
RS
781
782 /* Only the standard syntax table should have a default element.
783 Other syntax tables should inherit from parents instead. */
784 XCHAR_TABLE (copy)->defalt = Qnil;
785
786 /* Copied syntax tables should all have parents.
787 If we copied one with no parent, such as the standard syntax table,
788 use the standard syntax table as the copy's parent. */
789 if (NILP (XCHAR_TABLE (copy)->parent))
790 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 791 return copy;
8489eb67
RS
792}
793
794DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
795 doc: /* Select a new syntax table for the current buffer.
796One argument, a syntax table. */)
797 (table)
8489eb67
RS
798 Lisp_Object table;
799{
4e4dfa78 800 int idx;
8ea151b2 801 check_syntax_table (table);
8489eb67
RS
802 current_buffer->syntax_table = table;
803 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
804 idx = PER_BUFFER_VAR_IDX (syntax_table);
805 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
806 return table;
807}
808\f
809/* Convert a letter which signifies a syntax code
810 into the code it signifies.
195d1361 811 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
812
813unsigned char syntax_spec_code[0400] =
814 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
815 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
816 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 818 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
819 (char) Smath, 0377, 0377, (char) Squote,
820 (char) Sopen, (char) Sclose, 0377, 0377,
821 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
823 0377, 0377, 0377, 0377,
824 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 825 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
826 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
827 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
828 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
829 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
830 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
831 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 832 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
833 };
834
195d1361 835/* Indexed by syntax code, give the letter that describes it. */
8489eb67 836
195d1361 837char syntax_code_spec[16] =
8489eb67 838 {
195d1361
RS
839 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
840 '!', '|'
8489eb67 841 };
93da5fff
KH
842
843/* Indexed by syntax code, give the object (cons of syntax code and
844 nil) to be stored in syntax table. Since these objects can be
845 shared among syntax tables, we generate them in advance. By
846 sharing objects, the function `describe-syntax' can give a more
847 compact listing. */
848static Lisp_Object Vsyntax_code_object;
849
8489eb67
RS
850\f
851DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93 852 doc: /* Return the syntax code of CHARACTER, described by a character.
e2b6daf4
CY
853For example, if CHARACTER is a word constituent, the
854character `w' (119) is returned.
fdb82f93
PJ
855The characters that correspond to various syntax codes
856are listed in the documentation of `modify-syntax-entry'. */)
857 (character)
2203e1e8 858 Lisp_Object character;
8489eb67 859{
8ea151b2 860 int char_int;
774b9a60 861 CHECK_CHARACTER (character);
2203e1e8 862 char_int = XINT (character);
d48cd3f4 863 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 864 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
865}
866
867DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93
PJ
868 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
869 (character)
2203e1e8 870 Lisp_Object character;
beefa22e 871{
8ea151b2 872 int char_int, code;
b7826503 873 CHECK_NUMBER (character);
2203e1e8 874 char_int = XINT (character);
d48cd3f4 875 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 876 code = SYNTAX (char_int);
a8bd7cd8 877 if (code == Sopen || code == Sclose)
2e34157c 878 return SYNTAX_MATCH (char_int);
beefa22e 879 return Qnil;
8489eb67
RS
880}
881
c65adb44 882DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
883 doc: /* Convert a syntax specification STRING into syntax cell form.
884STRING should be a string as it is allowed as argument of
885`modify-syntax-entry'. Value is the equivalent cons cell
bce3dc36 886\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
fdb82f93
PJ
887text property. */)
888 (string)
c65adb44 889 Lisp_Object string;
8489eb67 890{
2e567bd3 891 register const unsigned char *p;
8489eb67 892 register enum syntaxcode code;
247e20a8 893 int val;
8ea151b2 894 Lisp_Object match;
8489eb67 895
b7826503 896 CHECK_STRING (string);
8489eb67 897
d5db4077 898 p = SDATA (string);
8489eb67
RS
899 code = (enum syntaxcode) syntax_spec_code[*p++];
900 if (((int) code & 0377) == 0377)
b764a653 901 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 902
8ea151b2 903 if (code == Sinherit)
c65adb44 904 return Qnil;
8ea151b2
RS
905
906 if (*p)
d1be9f0f 907 {
93da5fff 908 int len;
62a6e103 909 int character = STRING_CHAR_AND_LENGTH (p, len);
93da5fff 910 XSETINT (match, character);
d1be9f0f
RS
911 if (XFASTINT (match) == ' ')
912 match = Qnil;
93da5fff 913 p += len;
d1be9f0f
RS
914 }
915 else
8ea151b2 916 match = Qnil;
8489eb67 917
8ea151b2 918 val = (int) code;
8489eb67
RS
919 while (*p)
920 switch (*p++)
921 {
922 case '1':
247e20a8 923 val |= 1 << 16;
8489eb67
RS
924 break;
925
926 case '2':
247e20a8 927 val |= 1 << 17;
8489eb67
RS
928 break;
929
930 case '3':
247e20a8 931 val |= 1 << 18;
8489eb67
RS
932 break;
933
934 case '4':
247e20a8 935 val |= 1 << 19;
8489eb67
RS
936 break;
937
938 case 'p':
247e20a8 939 val |= 1 << 20;
8489eb67 940 break;
e5d4f4dc
RS
941
942 case 'b':
247e20a8 943 val |= 1 << 21;
e5d4f4dc 944 break;
95ff8dfc
RS
945
946 case 'n':
947 val |= 1 << 22;
948 break;
8489eb67 949 }
7d0393cf 950
93da5fff 951 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 952 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
953 else
954 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
955 return Fcons (make_number (val), match);
956}
957
fdb82f93 958/* I really don't know why this is interactive
2c9e1900 959 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 960DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 961 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 962 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 963The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93 964 the current buffer's syntax table.
5c7b02ab 965CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
fedc6ab5 966in the range MIN to MAX are changed.
fdb82f93
PJ
967The first character of NEWENTRY should be one of the following:
968 Space or - whitespace syntax. w word constituent.
969 _ symbol constituent. . punctuation.
970 ( open-parenthesis. ) close-parenthesis.
971 " string quote. \\ escape.
972 $ paired delimiter. ' expression quote or prefix operator.
973 < comment starter. > comment ender.
974 / character-quote. @ inherit from `standard-syntax-table'.
975 | generic string fence. ! generic comment fence.
976
977Only single-character comment start and end sequences are represented thus.
978Two-character sequences are represented as described below.
979The second character of NEWENTRY is the matching parenthesis,
980 used only if the first character is `(' or `)'.
981Any additional characters are flags.
982Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
983 1 means CHAR is the start of a two-char comment start sequence.
984 2 means CHAR is the second character of such a sequence.
985 3 means CHAR is the start of a two-char comment end sequence.
986 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
987
988There can be up to two orthogonal comment sequences. This is to support
989language modes such as C++. By default, all comment sequences are of style
990a, but you can set the comment sequence style to b (on the second character
991of a comment-start, or the first character of a comment-end sequence) using
992this flag:
a50a10a0
PJ
993 b means CHAR is part of comment sequence b.
994 n means CHAR is part of a nestable comment sequence.
fdb82f93 995
a50a10a0 996 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 997 such characters are treated as whitespace when they occur
a50a10a0 998 between expressions.
fedc6ab5 999usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
fdb82f93 1000 (c, newentry, syntax_table)
c65adb44
SM
1001 Lisp_Object c, newentry, syntax_table;
1002{
5c7b02ab
KH
1003 if (CONSP (c))
1004 {
8f924df7
KH
1005 CHECK_CHARACTER_CAR (c);
1006 CHECK_CHARACTER_CDR (c);
5c7b02ab
KH
1007 }
1008 else
1009 CHECK_CHARACTER (c);
c65adb44
SM
1010
1011 if (NILP (syntax_table))
1012 syntax_table = current_buffer->syntax_table;
1013 else
1014 check_syntax_table (syntax_table);
8489eb67 1015
5c7b02ab
KH
1016 newentry = Fstring_to_syntax (newentry);
1017 if (CONSP (c))
1018 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1019 else
1020 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
e5b94d44
CY
1021
1022 /* We clear the regexp cache, since character classes can now have
1023 different values from those in the compiled regexps.*/
1024 clear_regexp_cache ();
1025
8489eb67
RS
1026 return Qnil;
1027}
1028\f
1029/* Dump syntax table to buffer in human-readable format */
1030
62abe9cb
SM
1031DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1032 Sinternal_describe_syntax_value, 1, 1, 0,
1033 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1034 (syntax)
1035 Lisp_Object syntax;
8489eb67
RS
1036{
1037 register enum syntaxcode code;
e6365855 1038 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1039 char str[2];
62abe9cb 1040 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1041
8ea151b2
RS
1042 if (NILP (value))
1043 {
62abe9cb
SM
1044 insert_string ("default");
1045 return syntax;
8ea151b2
RS
1046 }
1047
908b7fea
KH
1048 if (CHAR_TABLE_P (value))
1049 {
62abe9cb
SM
1050 insert_string ("deeper char-table ...");
1051 return syntax;
908b7fea
KH
1052 }
1053
8ea151b2
RS
1054 if (!CONSP (value))
1055 {
62abe9cb
SM
1056 insert_string ("invalid");
1057 return syntax;
8ea151b2
RS
1058 }
1059
c1d497be
KR
1060 first = XCAR (value);
1061 match_lisp = XCDR (value);
8ea151b2
RS
1062
1063 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1064 {
62abe9cb
SM
1065 insert_string ("invalid");
1066 return syntax;
8489eb67
RS
1067 }
1068
e704cb4b 1069 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1070 start1 = (XINT (first) >> 16) & 1;
1071 start2 = (XINT (first) >> 17) & 1;
1072 end1 = (XINT (first) >> 18) & 1;
1073 end2 = (XINT (first) >> 19) & 1;
1074 prefix = (XINT (first) >> 20) & 1;
1075 comstyle = (XINT (first) >> 21) & 1;
e6365855 1076 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1077
1078 if ((int) code < 0 || (int) code >= (int) Smax)
1079 {
1080 insert_string ("invalid");
62abe9cb 1081 return syntax;
8489eb67
RS
1082 }
1083 desc = syntax_code_spec[(int) code];
1084
1085 str[0] = desc, str[1] = 0;
1086 insert (str, 1);
1087
93da5fff
KH
1088 if (NILP (match_lisp))
1089 insert (" ", 1);
1090 else
1091 insert_char (XINT (match_lisp));
8489eb67 1092
8489eb67
RS
1093 if (start1)
1094 insert ("1", 1);
1095 if (start2)
1096 insert ("2", 1);
1097
1098 if (end1)
1099 insert ("3", 1);
1100 if (end2)
1101 insert ("4", 1);
1102
1103 if (prefix)
1104 insert ("p", 1);
e5d4f4dc
RS
1105 if (comstyle)
1106 insert ("b", 1);
e6365855
SM
1107 if (comnested)
1108 insert ("n", 1);
8489eb67
RS
1109
1110 insert_string ("\twhich means: ");
1111
0220c518 1112 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1113 {
1114 case Swhitespace:
1115 insert_string ("whitespace"); break;
1116 case Spunct:
1117 insert_string ("punctuation"); break;
1118 case Sword:
1119 insert_string ("word"); break;
1120 case Ssymbol:
1121 insert_string ("symbol"); break;
1122 case Sopen:
1123 insert_string ("open"); break;
1124 case Sclose:
1125 insert_string ("close"); break;
1126 case Squote:
d671382e 1127 insert_string ("prefix"); break;
8489eb67
RS
1128 case Sstring:
1129 insert_string ("string"); break;
1130 case Smath:
1131 insert_string ("math"); break;
1132 case Sescape:
1133 insert_string ("escape"); break;
1134 case Scharquote:
1135 insert_string ("charquote"); break;
1136 case Scomment:
1137 insert_string ("comment"); break;
1138 case Sendcomment:
1139 insert_string ("endcomment"); break;
d671382e
SM
1140 case Sinherit:
1141 insert_string ("inherit"); break;
1142 case Scomment_fence:
1143 insert_string ("comment fence"); break;
1144 case Sstring_fence:
1145 insert_string ("string fence"); break;
8489eb67
RS
1146 default:
1147 insert_string ("invalid");
62abe9cb 1148 return syntax;
8489eb67
RS
1149 }
1150
8ea151b2 1151 if (!NILP (match_lisp))
8489eb67
RS
1152 {
1153 insert_string (", matches ");
8ea151b2 1154 insert_char (XINT (match_lisp));
8489eb67
RS
1155 }
1156
1157 if (start1)
1158 insert_string (",\n\t is the first character of a comment-start sequence");
1159 if (start2)
1160 insert_string (",\n\t is the second character of a comment-start sequence");
1161
1162 if (end1)
1163 insert_string (",\n\t is the first character of a comment-end sequence");
1164 if (end2)
1165 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1166 if (comstyle)
1167 insert_string (" (comment style b)");
e6365855
SM
1168 if (comnested)
1169 insert_string (" (nestable)");
e5d4f4dc 1170
8489eb67
RS
1171 if (prefix)
1172 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1173
62abe9cb
SM
1174 return syntax;
1175}
8489eb67 1176\f
195d1361
RS
1177int parse_sexp_ignore_comments;
1178
8f924df7
KH
1179/* Char-table of functions that find the next or previous word
1180 boundary. */
1181Lisp_Object Vfind_word_boundary_function_table;
869bb237 1182
8489eb67
RS
1183/* Return the position across COUNT words from FROM.
1184 If that many words cannot be found before the end of the buffer, return 0.
1185 COUNT negative means scan backward and stop at word beginning. */
1186
dfcf069d 1187int
8489eb67
RS
1188scan_words (from, count)
1189 register int from, count;
1190{
1191 register int beg = BEGV;
1192 register int end = ZV;
6a140a74 1193 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1194 register enum syntaxcode code;
1195 int ch0, ch1;
8f924df7 1196 Lisp_Object func, script, pos;
8489eb67
RS
1197
1198 immediate_quit = 1;
1199 QUIT;
1200
195d1361
RS
1201 SETUP_SYNTAX_TABLE (from, count);
1202
8489eb67
RS
1203 while (count > 0)
1204 {
1205 while (1)
1206 {
1207 if (from == end)
1208 {
1209 immediate_quit = 0;
1210 return 0;
1211 }
195d1361 1212 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1213 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1214 code = SYNTAX (ch0);
6a140a74 1215 INC_BOTH (from, from_byte);
8489eb67
RS
1216 if (words_include_escapes
1217 && (code == Sescape || code == Scharquote))
1218 break;
1219 if (code == Sword)
1220 break;
8489eb67 1221 }
93da5fff
KH
1222 /* Now CH0 is a character which begins a word and FROM is the
1223 position of the next character. */
8f924df7 1224 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
869bb237 1225 if (! NILP (Ffboundp (func)))
8489eb67 1226 {
869bb237 1227 pos = call2 (func, make_number (from - 1), make_number (end));
8f924df7
KH
1228 if (INTEGERP (pos) && XINT (pos) > from)
1229 {
1230 from = XINT (pos);
1231 from_byte = CHAR_TO_BYTE (from);
1232 }
8489eb67 1233 }
869bb237 1234 else
8cb8232a 1235 {
8cb8232a
KH
1236 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1237 while (1)
1238 {
1239 if (from == end) break;
1240 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1241 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1242 code = SYNTAX (ch1);
1243 if ((code != Sword
1244 && (! words_include_escapes
1245 || (code != Sescape && code != Scharquote)))
2b70a11c 1246 || word_boundary_p (ch0, ch1))
869bb237 1247 break;
8cb8232a
KH
1248 INC_BOTH (from, from_byte);
1249 ch0 = ch1;
1250 }
8489eb67
RS
1251 }
1252 count--;
1253 }
1254 while (count < 0)
1255 {
1256 while (1)
1257 {
1258 if (from == beg)
1259 {
1260 immediate_quit = 0;
1261 return 0;
1262 }
6a140a74 1263 DEC_BOTH (from, from_byte);
195d1361 1264 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 1265 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1266 code = SYNTAX (ch1);
8489eb67
RS
1267 if (words_include_escapes
1268 && (code == Sescape || code == Scharquote))
1269 break;
1270 if (code == Sword)
1271 break;
8489eb67 1272 }
93da5fff
KH
1273 /* Now CH1 is a character which ends a word and FROM is the
1274 position of it. */
8f924df7 1275 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
869bb237 1276 if (! NILP (Ffboundp (func)))
8f924df7 1277 {
869bb237 1278 pos = call2 (func, make_number (from), make_number (beg));
8f924df7
KH
1279 if (INTEGERP (pos) && XINT (pos) < from)
1280 {
1281 from = XINT (pos);
1282 from_byte = CHAR_TO_BYTE (from);
1283 }
8489eb67 1284 }
869bb237 1285 else
8489eb67 1286 {
8cb8232a
KH
1287 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1288 while (1)
1289 {
8cb8232a
KH
1290 if (from == beg)
1291 break;
262be72a 1292 DEC_BOTH (from, from_byte);
8cb8232a 1293 UPDATE_SYNTAX_TABLE_BACKWARD (from);
262be72a 1294 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1295 code = SYNTAX (ch0);
1296 if ((code != Sword
1297 && (! words_include_escapes
1298 || (code != Sescape && code != Scharquote)))
2b70a11c 1299 || word_boundary_p (ch0, ch1))
262be72a
MB
1300 {
1301 INC_BOTH (from, from_byte);
1302 break;
1303 }
8cb8232a
KH
1304 ch1 = ch0;
1305 }
8489eb67
RS
1306 }
1307 count++;
1308 }
1309
1310 immediate_quit = 0;
1311
1312 return from;
1313}
1314
1e9dbb5f 1315DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
fdb82f93
PJ
1316 doc: /* Move point forward ARG words (backward if ARG is negative).
1317Normally returns t.
1318If an edge of the buffer or a field boundary is reached, point is left there
1319and the function returns nil. Field boundaries are not noticed if
1320`inhibit-field-text-motion' is non-nil. */)
839966f3
KH
1321 (arg)
1322 Lisp_Object arg;
8489eb67 1323{
c096ae4d 1324 Lisp_Object tmp;
2c6ea900 1325 int orig_val, val;
8489eb67 1326
839966f3
KH
1327 if (NILP (arg))
1328 XSETFASTINT (arg, 1);
1329 else
1330 CHECK_NUMBER (arg);
1331
1332 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1333 if (! orig_val)
839966f3 1334 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1335
b8855607 1336 /* Avoid jumping out of an input field. */
c096ae4d
SM
1337 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1338 Qt, Qnil, Qnil);
1339 val = XFASTINT (tmp);
7d0393cf 1340
8489eb67 1341 SET_PT (val);
e6d8341f 1342 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1343}
1344\f
195d1361
RS
1345Lisp_Object skip_chars ();
1346
1347DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1348 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1349STRING is like the inside of a `[...]' in a regular expression
1350except that `]' is never special and `\\' quotes `^', `-' or `\\'
7087d5e9 1351 (but not at the end of a range; quoting is never needed there).
fdb82f93
PJ
1352Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1353With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1354Char classes, e.g. `[:alpha:]', are supported.
1355
1356Returns the distance traveled, either zero or positive. */)
fdb82f93 1357 (string, lim)
195d1361
RS
1358 Lisp_Object string, lim;
1359{
327719ee 1360 return skip_chars (1, string, lim, 1);
195d1361
RS
1361}
1362
1363DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1364 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1365See `skip-chars-forward' for details.
1366Returns the distance traveled, either zero or negative. */)
1367 (string, lim)
195d1361
RS
1368 Lisp_Object string, lim;
1369{
327719ee 1370 return skip_chars (0, string, lim, 1);
195d1361
RS
1371}
1372
1373DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1374 doc: /* Move point forward across chars in specified syntax classes.
1375SYNTAX is a string of syntax code characters.
1376Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1377If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1378This function returns the distance traveled, either zero or positive. */)
1379 (syntax, lim)
195d1361
RS
1380 Lisp_Object syntax, lim;
1381{
b7dbcc19 1382 return skip_syntaxes (1, syntax, lim);
195d1361
RS
1383}
1384
1385DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1386 doc: /* Move point backward across chars in specified syntax classes.
1387SYNTAX is a string of syntax code characters.
1388Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1389If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1390This function returns the distance traveled, either zero or negative. */)
1391 (syntax, lim)
195d1361
RS
1392 Lisp_Object syntax, lim;
1393{
b7dbcc19 1394 return skip_syntaxes (0, syntax, lim);
195d1361
RS
1395}
1396
6a140a74 1397static Lisp_Object
327719ee 1398skip_chars (forwardp, string, lim, handle_iso_classes)
b7dbcc19 1399 int forwardp;
195d1361 1400 Lisp_Object string, lim;
a1bc88d4 1401 int handle_iso_classes;
195d1361 1402{
195d1361
RS
1403 register unsigned int c;
1404 unsigned char fastmap[0400];
b7dbcc19 1405 /* Store the ranges of non-ASCII characters. */
5df4982e 1406 int *char_ranges;
7e68b0ea 1407 int n_char_ranges = 0;
195d1361 1408 int negate = 0;
4101e6fe 1409 register int i, i_byte;
b7dbcc19
KH
1410 /* Set to 1 if the current buffer is multibyte and the region
1411 contains non-ASCII chars. */
1412 int multibyte;
1413 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1414 chars. */
1674d9a2
RS
1415 int string_multibyte;
1416 int size_byte;
2e567bd3 1417 const unsigned char *str;
82d497fc 1418 int len;
a1bc88d4 1419 Lisp_Object iso_classes;
195d1361 1420
b7826503 1421 CHECK_STRING (string);
a1bc88d4 1422 iso_classes = Qnil;
82d497fc 1423
195d1361
RS
1424 if (NILP (lim))
1425 XSETINT (lim, forwardp ? ZV : BEGV);
1426 else
b7826503 1427 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1428
1429 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1430 if (XINT (lim) > ZV)
1431 XSETFASTINT (lim, ZV);
1432 if (XINT (lim) < BEGV)
1433 XSETFASTINT (lim, BEGV);
1434
b7dbcc19 1435 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
92eaa22e 1436 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
8f924df7 1437 string_multibyte = SBYTES (string) > SCHARS (string);
b7dbcc19 1438
195d1361
RS
1439 bzero (fastmap, sizeof fastmap);
1440
8f924df7
KH
1441 str = SDATA (string);
1442 size_byte = SBYTES (string);
4101e6fe 1443
82d497fc 1444 i_byte = 0;
13090112 1445 if (i_byte < size_byte
d5db4077 1446 && SREF (string, 0) == '^')
195d1361 1447 {
82d497fc 1448 negate = 1; i_byte++;
195d1361
RS
1449 }
1450
1451 /* Find the characters specified and set their elements of fastmap.
b7dbcc19 1452 Handle backslashes and ranges specially.
195d1361 1453
b7dbcc19
KH
1454 If STRING contains non-ASCII characters, setup char_ranges for
1455 them and use fastmap only for their leading codes. */
4101e6fe 1456
b7dbcc19 1457 if (! string_multibyte)
195d1361 1458 {
b7dbcc19 1459 int string_has_eight_bit = 0;
4101e6fe 1460
b7dbcc19
KH
1461 /* At first setup fastmap. */
1462 while (i_byte < size_byte)
1463 {
1464 c = str[i_byte++];
1465
a1bc88d4
RS
1466 if (handle_iso_classes && c == '['
1467 && i_byte < size_byte
327719ee 1468 && str[i_byte] == ':')
a1bc88d4
RS
1469 {
1470 const unsigned char *class_beg = str + i_byte + 1;
1471 const unsigned char *class_end = class_beg;
b3bda4fd 1472 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1473 /* Leave room for the null. */
a1bc88d4
RS
1474 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1475 re_wctype_t cc;
1476
1477 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1478 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1479
b3bda4fd
KS
1480 while (class_end < class_limit
1481 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1482 class_end++;
1483
b3bda4fd
KS
1484 if (class_end == class_beg
1485 || *class_end != ':' || class_end[1] != ']')
1486 goto not_a_class_name;
a1bc88d4
RS
1487
1488 bcopy (class_beg, class_name, class_end - class_beg);
1489 class_name[class_end - class_beg] = 0;
1490
1491 cc = re_wctype (class_name);
1492 if (cc == 0)
1493 error ("Invalid ISO C character class");
1494
1495 iso_classes = Fcons (make_number (cc), iso_classes);
1496
1497 i_byte = class_end + 2 - str;
1498 continue;
1499 }
1500
b3bda4fd 1501 not_a_class_name:
b7dbcc19
KH
1502 if (c == '\\')
1503 {
13090112 1504 if (i_byte == size_byte)
4101e6fe
RS
1505 break;
1506
5c7b02ab 1507 c = str[i_byte++];
195d1361 1508 }
839966f3
KH
1509 /* Treat `-' as range character only if another character
1510 follows. */
1511 if (i_byte + 1 < size_byte
82d497fc 1512 && str[i_byte] == '-')
195d1361 1513 {
9690d026 1514 unsigned int c2;
4101e6fe
RS
1515
1516 /* Skip over the dash. */
82d497fc 1517 i_byte++;
4101e6fe 1518
4101e6fe 1519 /* Get the end of the range. */
5c7b02ab 1520 c2 = str[i_byte++];
b7dbcc19
KH
1521 if (c2 == '\\'
1522 && i_byte < size_byte)
1523 c2 = str[i_byte++];
1524
8f924df7
KH
1525 if (c <= c2)
1526 {
1527 while (c <= c2)
1528 fastmap[c++] = 1;
1529 if (! ASCII_CHAR_P (c2))
1530 string_has_eight_bit = 1;
1531 }
195d1361
RS
1532 }
1533 else
b7dbcc19
KH
1534 {
1535 fastmap[c] = 1;
1536 if (! ASCII_CHAR_P (c))
1537 string_has_eight_bit = 1;
1538 }
1539 }
1540
1541 /* If the current range is multibyte and STRING contains
1542 eight-bit chars, arrange fastmap and setup char_ranges for
1543 the corresponding multibyte chars. */
1544 if (multibyte && string_has_eight_bit)
1545 {
1546 unsigned char fastmap2[0400];
1547 int range_start_byte, range_start_char;
1548
1549 bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
1550 bzero (fastmap + 0200, 0200);
1551 /* We are sure that this loop stops. */
1552 for (i = 0200; ! fastmap2[i]; i++);
4c0354d7 1553 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1554 fastmap[CHAR_LEADING_CODE (c)] = 1;
1555 range_start_byte = i;
1556 range_start_char = c;
f003ca54 1557 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
b7dbcc19
KH
1558 for (i = 129; i < 0400; i++)
1559 {
4c0354d7 1560 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1561 fastmap[CHAR_LEADING_CODE (c)] = 1;
1562 if (i - range_start_byte != c - range_start_char)
1563 {
1564 char_ranges[n_char_ranges++] = range_start_char;
1565 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1566 + range_start_char);
1567 range_start_byte = i;
1568 range_start_char = c;
8f924df7 1569 }
b7dbcc19
KH
1570 }
1571 char_ranges[n_char_ranges++] = range_start_char;
1572 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1573 + range_start_char);
195d1361
RS
1574 }
1575 }
f003ca54 1576 else /* STRING is multibyte */
b7dbcc19 1577 {
f003ca54
KH
1578 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1579
b7dbcc19 1580 while (i_byte < size_byte)
195d1361 1581 {
b7dbcc19 1582 unsigned char leading_code;
5c7b02ab 1583
b7dbcc19 1584 leading_code = str[i_byte];
62a6e103 1585 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19 1586 i_byte += len;
5c7b02ab 1587
327719ee
MB
1588 if (handle_iso_classes && c == '['
1589 && i_byte < size_byte
62a6e103 1590 && STRING_CHAR (str + i_byte) == ':')
327719ee
MB
1591 {
1592 const unsigned char *class_beg = str + i_byte + 1;
1593 const unsigned char *class_end = class_beg;
1594 const unsigned char *class_limit = str + size_byte - 2;
1595 /* Leave room for the null. */
1596 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1597 re_wctype_t cc;
1598
1599 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1600 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1601
1602 while (class_end < class_limit
1603 && *class_end >= 'a' && *class_end <= 'z')
1604 class_end++;
1605
1606 if (class_end == class_beg
1607 || *class_end != ':' || class_end[1] != ']')
1608 goto not_a_class_name_multibyte;
1609
1610 bcopy (class_beg, class_name, class_end - class_beg);
1611 class_name[class_end - class_beg] = 0;
1612
1613 cc = re_wctype (class_name);
1614 if (cc == 0)
1615 error ("Invalid ISO C character class");
1616
1617 iso_classes = Fcons (make_number (cc), iso_classes);
1618
1619 i_byte = class_end + 2 - str;
1620 continue;
1621 }
1622
1623 not_a_class_name_multibyte:
195d1361
RS
1624 if (c == '\\')
1625 {
13090112 1626 if (i_byte == size_byte)
4101e6fe
RS
1627 break;
1628
b7dbcc19 1629 leading_code = str[i_byte];
62a6e103 1630 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1631 i_byte += len;
195d1361 1632 }
839966f3
KH
1633 /* Treat `-' as range character only if another character
1634 follows. */
1635 if (i_byte + 1 < size_byte
82d497fc 1636 && str[i_byte] == '-')
195d1361 1637 {
9690d026 1638 unsigned int c2;
b7dbcc19 1639 unsigned char leading_code2;
4101e6fe
RS
1640
1641 /* Skip over the dash. */
82d497fc 1642 i_byte++;
4101e6fe 1643
4101e6fe 1644 /* Get the end of the range. */
b7dbcc19 1645 leading_code2 = str[i_byte];
62a6e103 1646 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1647 i_byte += len;
7e68b0ea 1648
b7dbcc19
KH
1649 if (c2 == '\\'
1650 && i_byte < size_byte)
1651 {
1652 leading_code2 = str[i_byte];
62a6e103 1653 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19
KH
1654 i_byte += len;
1655 }
1656
f003ca54
KH
1657 if (c > c2)
1658 continue;
b7dbcc19 1659 if (ASCII_CHAR_P (c))
06274af5 1660 {
5c7b02ab
KH
1661 while (c <= c2 && c < 0x80)
1662 fastmap[c++] = 1;
b7dbcc19
KH
1663 leading_code = CHAR_LEADING_CODE (c);
1664 }
1665 if (! ASCII_CHAR_P (c))
1666 {
1667 while (leading_code <= leading_code2)
1668 fastmap[leading_code++] = 1;
1669 if (c <= c2)
e39091b6 1670 {
b7dbcc19 1671 char_ranges[n_char_ranges++] = c;
e39091b6 1672 char_ranges[n_char_ranges++] = c2;
06274af5
KH
1673 }
1674 }
195d1361
RS
1675 }
1676 else
7e68b0ea 1677 {
b7dbcc19 1678 if (ASCII_CHAR_P (c))
e39091b6
KH
1679 fastmap[c] = 1;
1680 else
7e68b0ea 1681 {
b7dbcc19 1682 fastmap[leading_code] = 1;
4101e6fe
RS
1683 char_ranges[n_char_ranges++] = c;
1684 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1685 }
1686 }
195d1361 1687 }
b7dbcc19
KH
1688
1689 /* If the current range is unibyte and STRING contains non-ASCII
1690 chars, arrange fastmap for the corresponding unibyte
1691 chars. */
1692
1693 if (! multibyte && n_char_ranges > 0)
1694 {
1695 bzero (fastmap + 0200, 0200);
1696 for (i = 0; i < n_char_ranges; i += 2)
1697 {
1698 int c1 = char_ranges[i];
1699 int c2 = char_ranges[i + 1];
1700
1701 for (; c1 <= c2; c1++)
2afc21f5
SM
1702 {
1703 int b = CHAR_TO_BYTE_SAFE (c1);
1704 if (b >= 0)
1705 fastmap[b] = 1;
1706 }
b7dbcc19
KH
1707 }
1708 }
195d1361
RS
1709 }
1710
9690d026 1711 /* If ^ was the first character, complement the fastmap. */
195d1361 1712 if (negate)
b7dbcc19
KH
1713 {
1714 if (! multibyte)
1715 for (i = 0; i < sizeof fastmap; i++)
1716 fastmap[i] ^= 1;
1717 else
1718 {
1719 for (i = 0; i < 0200; i++)
1720 fastmap[i] ^= 1;
1721 /* All non-ASCII chars possibly match. */
1722 for (; i < sizeof fastmap; i++)
1723 fastmap[i] = 1;
1724 }
1725 }
195d1361
RS
1726
1727 {
1728 int start_point = PT;
1729 int pos = PT;
6a140a74 1730 int pos_byte = PT_BYTE;
9af7511a
KH
1731 unsigned char *p = PT_ADDR, *endp, *stop;
1732
1733 if (forwardp)
1734 {
1735 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1736 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1737 }
1738 else
1739 {
1740 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1741 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1742 }
195d1361
RS
1743
1744 immediate_quit = 1;
618db430
SM
1745 /* This code may look up syntax tables using macros that rely on the
1746 gl_state object. To make sure this object is not out of date,
1747 let's initialize it manually.
1748 We ignore syntax-table text-properties for now, since that's
1749 what we've done in the past. */
d48cd3f4 1750 SETUP_BUFFER_SYNTAX_TABLE ();
b7dbcc19 1751 if (forwardp)
195d1361 1752 {
b7dbcc19 1753 if (multibyte)
8f924df7 1754 while (1)
b7dbcc19 1755 {
8f924df7 1756 int nbytes;
9af7511a 1757
8f924df7 1758 if (p >= stop)
9af7511a 1759 {
8f924df7 1760 if (p >= endp)
9af7511a 1761 break;
8f924df7
KH
1762 p = GAP_END_ADDR;
1763 stop = endp;
9af7511a 1764 }
62a6e103 1765 c = STRING_CHAR_AND_LENGTH (p, nbytes);
327719ee 1766 if (! NILP (iso_classes) && in_classes (c, iso_classes))
9af7511a 1767 {
327719ee 1768 if (negate)
9af7511a 1769 break;
327719ee
MB
1770 else
1771 goto fwd_ok;
9af7511a 1772 }
9af7511a 1773
8f924df7 1774 if (! fastmap[*p])
b7dbcc19
KH
1775 break;
1776 if (! ASCII_CHAR_P (c))
9af7511a 1777 {
b7dbcc19
KH
1778 /* As we are looking at a multibyte character, we
1779 must look up the character in the table
1780 CHAR_RANGES. If there's no data in the table,
1781 that character is not what we want to skip. */
1782
1783 /* The following code do the right thing even if
1784 n_char_ranges is zero (i.e. no data in
1785 CHAR_RANGES). */
1786 for (i = 0; i < n_char_ranges; i += 2)
1787 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
9af7511a 1788 break;
b7dbcc19
KH
1789 if (!(negate ^ (i < n_char_ranges)))
1790 break;
9af7511a 1791 }
327719ee 1792 fwd_ok:
8f924df7 1793 p += nbytes, pos++, pos_byte += nbytes;
b7dbcc19
KH
1794 }
1795 else
8f924df7
KH
1796 while (1)
1797 {
1798 if (p >= stop)
9af7511a 1799 {
8f924df7 1800 if (p >= endp)
9af7511a 1801 break;
8f924df7
KH
1802 p = GAP_END_ADDR;
1803 stop = endp;
9af7511a 1804 }
327719ee
MB
1805
1806 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1807 {
1808 if (negate)
1809 break;
1810 else
1811 goto fwd_unibyte_ok;
1812 }
1813
8f924df7
KH
1814 if (!fastmap[*p])
1815 break;
327719ee 1816 fwd_unibyte_ok:
8f924df7
KH
1817 p++, pos++, pos_byte++;
1818 }
195d1361
RS
1819 }
1820 else
1821 {
b7dbcc19 1822 if (multibyte)
8f924df7 1823 while (1)
b7dbcc19 1824 {
8f924df7 1825 unsigned char *prev_p;
9af7511a 1826
8f924df7 1827 if (p <= stop)
9af7511a 1828 {
8f924df7 1829 if (p <= endp)
9af7511a 1830 break;
8f924df7
KH
1831 p = GPT_ADDR;
1832 stop = endp;
9af7511a 1833 }
8f924df7
KH
1834 prev_p = p;
1835 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 1836 c = STRING_CHAR (p);
a1bc88d4 1837
327719ee
MB
1838 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1839 {
1840 if (negate)
1841 break;
9690d026 1842 else
327719ee 1843 goto back_ok;
7e68b0ea 1844 }
a1bc88d4 1845
8f924df7 1846 if (! fastmap[*p])
b7dbcc19
KH
1847 break;
1848 if (! ASCII_CHAR_P (c))
7e68b0ea 1849 {
b7dbcc19
KH
1850 /* See the comment in the previous similar code. */
1851 for (i = 0; i < n_char_ranges; i += 2)
1852 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1853 break;
1854 if (!(negate ^ (i < n_char_ranges)))
1855 break;
7e68b0ea 1856 }
327719ee 1857 back_ok:
8f924df7 1858 pos--, pos_byte -= prev_p - p;
b7dbcc19
KH
1859 }
1860 else
8f924df7
KH
1861 while (1)
1862 {
1863 if (p <= stop)
9af7511a 1864 {
8f924df7 1865 if (p <= endp)
9af7511a 1866 break;
8f924df7
KH
1867 p = GPT_ADDR;
1868 stop = endp;
9af7511a 1869 }
e39091b6 1870
327719ee
MB
1871 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1872 {
1873 if (negate)
1874 break;
9af7511a 1875 else
327719ee
MB
1876 goto back_unibyte_ok;
1877 }
a1bc88d4 1878
8f924df7
KH
1879 if (!fastmap[p[-1]])
1880 break;
327719ee 1881 back_unibyte_ok:
8f924df7
KH
1882 p--, pos--, pos_byte--;
1883 }
195d1361 1884 }
7e68b0ea 1885
b7dbcc19
KH
1886 SET_PT_BOTH (pos, pos_byte);
1887 immediate_quit = 0;
1888
1889 return make_number (PT - start_point);
1890 }
1891}
7e68b0ea 1892
b7dbcc19
KH
1893
1894static Lisp_Object
1895skip_syntaxes (forwardp, string, lim)
1896 int forwardp;
1897 Lisp_Object string, lim;
1898{
1899 register unsigned int c;
1900 unsigned char fastmap[0400];
1901 int negate = 0;
1902 register int i, i_byte;
1903 int multibyte;
1904 int size_byte;
1905 unsigned char *str;
1906
1907 CHECK_STRING (string);
1908
1909 if (NILP (lim))
1910 XSETINT (lim, forwardp ? ZV : BEGV);
1911 else
1912 CHECK_NUMBER_COERCE_MARKER (lim);
1913
1914 /* In any case, don't allow scan outside bounds of buffer. */
1915 if (XINT (lim) > ZV)
1916 XSETFASTINT (lim, ZV);
1917 if (XINT (lim) < BEGV)
1918 XSETFASTINT (lim, BEGV);
1919
ade8ee9e 1920 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
49feb1cd 1921 return make_number (0);
8c6e735b 1922
b7dbcc19 1923 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
92eaa22e 1924 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
b7dbcc19
KH
1925
1926 bzero (fastmap, sizeof fastmap);
1927
8f924df7
KH
1928 if (SBYTES (string) > SCHARS (string))
1929 /* As this is very rare case (syntax spec is ASCII only), don't
1930 consider efficiency. */
bc796a59
KH
1931 string = string_make_unibyte (string);
1932
8f924df7
KH
1933 str = SDATA (string);
1934 size_byte = SBYTES (string);
bc796a59 1935
b7dbcc19
KH
1936 i_byte = 0;
1937 if (i_byte < size_byte
8f924df7 1938 && SREF (string, 0) == '^')
b7dbcc19
KH
1939 {
1940 negate = 1; i_byte++;
1941 }
1942
b7dbcc19
KH
1943 /* Find the syntaxes specified and set their elements of fastmap. */
1944
1945 while (i_byte < size_byte)
1946 {
1947 c = str[i_byte++];
8f924df7 1948 fastmap[syntax_spec_code[c]] = 1;
b7dbcc19 1949 }
195d1361 1950
9690d026 1951 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1952 if (negate)
1953 for (i = 0; i < sizeof fastmap; i++)
9690d026 1954 fastmap[i] ^= 1;
195d1361
RS
1955
1956 {
1957 int start_point = PT;
1958 int pos = PT;
6a140a74 1959 int pos_byte = PT_BYTE;
8f924df7
KH
1960 unsigned char *p = PT_ADDR, *endp, *stop;
1961
1962 if (forwardp)
1963 {
1964 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1965 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1966 }
1967 else
1968 {
1969 endp = CHAR_POS_ADDR (XINT (lim));
1970 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1971 }
195d1361
RS
1972
1973 immediate_quit = 1;
b7dbcc19
KH
1974 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1975 if (forwardp)
195d1361 1976 {
b7dbcc19 1977 if (multibyte)
195d1361 1978 {
8f924df7 1979 while (1)
8c6e735b 1980 {
8f924df7
KH
1981 int nbytes;
1982
1983 if (p >= stop)
1984 {
1985 if (p >= endp)
1986 break;
1987 p = GAP_END_ADDR;
1988 stop = endp;
1989 }
62a6e103 1990 c = STRING_CHAR_AND_LENGTH (p, nbytes);
8f924df7 1991 if (! fastmap[(int) SYNTAX (c)])
8c6e735b 1992 break;
8f924df7 1993 p += nbytes, pos++, pos_byte += nbytes;
8c6e735b
KH
1994 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1995 }
195d1361
RS
1996 }
1997 else
1998 {
8c6e735b 1999 while (1)
7e68b0ea 2000 {
8f924df7
KH
2001 if (p >= stop)
2002 {
2003 if (p >= endp)
2004 break;
2005 p = GAP_END_ADDR;
2006 stop = endp;
2007 }
2008 if (! fastmap[(int) SYNTAX (*p)])
8c6e735b 2009 break;
8f924df7 2010 p++, pos++, pos_byte++;
b7dbcc19 2011 UPDATE_SYNTAX_TABLE_FORWARD (pos);
195d1361
RS
2012 }
2013 }
2014 }
2015 else
2016 {
b7dbcc19 2017 if (multibyte)
195d1361 2018 {
8c6e735b 2019 while (1)
b7dbcc19 2020 {
8f924df7
KH
2021 unsigned char *prev_p;
2022
2023 if (p <= stop)
b7dbcc19 2024 {
8f924df7
KH
2025 if (p <= endp)
2026 break;
2027 p = GPT_ADDR;
2028 stop = endp;
b7dbcc19 2029 }
262be72a 2030 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7
KH
2031 prev_p = p;
2032 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 2033 c = STRING_CHAR (p);
8f924df7
KH
2034 if (! fastmap[(int) SYNTAX (c)])
2035 break;
2036 pos--, pos_byte -= prev_p - p;
b7dbcc19 2037 }
195d1361
RS
2038 }
2039 else
2040 {
8c6e735b
KH
2041 while (1)
2042 {
8f924df7
KH
2043 if (p <= stop)
2044 {
2045 if (p <= endp)
2046 break;
2047 p = GPT_ADDR;
2048 stop = endp;
2049 }
102f6132 2050 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7 2051 if (! fastmap[(int) SYNTAX (p[-1])])
8c6e735b 2052 break;
8f924df7 2053 p--, pos--, pos_byte--;
8c6e735b 2054 }
195d1361
RS
2055 }
2056 }
6a140a74
RS
2057
2058 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
2059 immediate_quit = 0;
2060
2061 return make_number (PT - start_point);
2062 }
2063}
a1bc88d4
RS
2064
2065/* Return 1 if character C belongs to one of the ISO classes
2066 in the list ISO_CLASSES. Each class is represented by an
2067 integer which is its type according to re_wctype. */
2068
2069static int
2070in_classes (c, iso_classes)
2071 int c;
2072 Lisp_Object iso_classes;
2073{
2074 int fits_class = 0;
2075
618db430 2076 while (CONSP (iso_classes))
a1bc88d4
RS
2077 {
2078 Lisp_Object elt;
2079 elt = XCAR (iso_classes);
2080 iso_classes = XCDR (iso_classes);
2081
2082 if (re_iswctype (c, XFASTINT (elt)))
2083 fits_class = 1;
2084 }
2085
2086 return fits_class;
2087}
195d1361 2088\f
95ff8dfc
RS
2089/* Jump over a comment, assuming we are at the beginning of one.
2090 FROM is the current position.
2091 FROM_BYTE is the bytepos corresponding to FROM.
2092 Do not move past STOP (a charpos).
2093 The comment over which we have to jump is of style STYLE
2094 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2095 NESTING should be positive to indicate the nesting at the beginning
2096 for nested comments and should be zero or negative else.
2097 ST_COMMENT_STYLE cannot be nested.
2098 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2099 (or 0 If the search cannot start in the middle of a two-character).
2100
2101 If successful, return 1 and store the charpos of the comment's end
2102 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2103 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2104 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2105 (as defined for state.incomment) in *INCOMMENT_PTR.
2106
2107 The comment end is the last character of the comment rather than the
2108 character just after the comment.
2109
2110 Global syntax data is assumed to initially be valid for FROM and
2111 remains valid for forward search starting at the returned position. */
2112
2113static int
2114forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
2115 charpos_ptr, bytepos_ptr, incomment_ptr)
a0aa7fcf 2116 EMACS_INT from, from_byte, stop;
95ff8dfc 2117 int nesting, style, prev_syntax;
a0aa7fcf
SM
2118 EMACS_INT *charpos_ptr, *bytepos_ptr;
2119 int *incomment_ptr;
95ff8dfc
RS
2120{
2121 register int c, c1;
2122 register enum syntaxcode code;
2123 register int syntax;
2124
2125 if (nesting <= 0) nesting = -1;
2126
2127 /* Enter the loop in the middle so that we find
2128 a 2-char comment ender if we start in the middle of it. */
2129 syntax = prev_syntax;
2130 if (syntax != 0) goto forw_incomment;
2131
2132 while (1)
2133 {
2134 if (from == stop)
2135 {
2136 *incomment_ptr = nesting;
2137 *charpos_ptr = from;
2138 *bytepos_ptr = from_byte;
2139 return 0;
2140 }
8f924df7 2141 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2142 syntax = SYNTAX_WITH_FLAGS (c);
2143 code = syntax & 0xff;
2144 if (code == Sendcomment
2145 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
2146 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2147 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
2148 /* we have encountered a comment end of the same style
2149 as the comment sequence which began this comment
2150 section */
2151 break;
2152 if (code == Scomment_fence
2153 && style == ST_COMMENT_STYLE)
2154 /* we have encountered a comment end of the same style
2155 as the comment sequence which began this comment
2156 section. */
2157 break;
2158 if (nesting > 0
2159 && code == Scomment
abf8a9ff 2160 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
2161 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
2162 /* we have encountered a nested comment of the same style
2163 as the comment sequence which began this comment section */
2164 nesting++;
2165 INC_BOTH (from, from_byte);
2166 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 2167
95ff8dfc
RS
2168 forw_incomment:
2169 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2170 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
8f924df7 2171 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
abf8a9ff
SM
2172 SYNTAX_COMEND_SECOND (c1))
2173 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2174 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
2175 {
2176 if (--nesting <= 0)
2177 /* we have encountered a comment end of the same style
2178 as the comment sequence which began this comment
2179 section */
2180 break;
2181 else
2182 {
2183 INC_BOTH (from, from_byte);
2184 UPDATE_SYNTAX_TABLE_FORWARD (from);
2185 }
2186 }
95ff8dfc
RS
2187 if (nesting > 0
2188 && from < stop
2189 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
8f924df7 2190 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
95ff8dfc 2191 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
2192 && SYNTAX_COMSTART_SECOND (c1))
2193 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2194 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
2195 /* we have encountered a nested comment of the same style
2196 as the comment sequence which began this comment
2197 section */
2198 {
2199 INC_BOTH (from, from_byte);
2200 UPDATE_SYNTAX_TABLE_FORWARD (from);
2201 nesting++;
2202 }
2203 }
2204 *charpos_ptr = from;
2205 *bytepos_ptr = from_byte;
2206 return 1;
2207}
2208
b3cfe0c8 2209DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 2210 doc: /*
ee648c11 2211Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
2212Stop scanning if we find something other than a comment or whitespace.
2213Set point to where scanning stops.
ee648c11 2214If COUNT comments are found as expected, with nothing except whitespace
fdb82f93
PJ
2215between them, return t; otherwise return nil. */)
2216 (count)
840f481c 2217 Lisp_Object count;
b3cfe0c8 2218{
2312c580
SM
2219 register EMACS_INT from;
2220 EMACS_INT from_byte;
2221 register EMACS_INT stop;
8ea151b2 2222 register int c, c1;
b3cfe0c8
RS
2223 register enum syntaxcode code;
2224 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2225 int comnested = 0; /* whether the comment is nestable or not */
be720845 2226 int found;
2312c580
SM
2227 EMACS_INT count1;
2228 EMACS_INT out_charpos, out_bytepos;
95ff8dfc 2229 int dummy;
840f481c 2230
b7826503 2231 CHECK_NUMBER (count);
840f481c 2232 count1 = XINT (count);
195d1361 2233 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2234
2235 immediate_quit = 1;
2236 QUIT;
2237
2238 from = PT;
6a140a74 2239 from_byte = PT_BYTE;
b3cfe0c8 2240
195d1361 2241 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2242 while (count1 > 0)
b3cfe0c8 2243 {
04882296 2244 do
b3cfe0c8 2245 {
f902a008
RS
2246 int comstart_first;
2247
04882296
KH
2248 if (from == stop)
2249 {
ef316cf0 2250 SET_PT_BOTH (from, from_byte);
b7e6e612 2251 immediate_quit = 0;
04882296
KH
2252 return Qnil;
2253 }
b7dbcc19 2254 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
b3cfe0c8 2255 code = SYNTAX (c);
f902a008 2256 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2257 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2258 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 2259 INC_BOTH (from, from_byte);
f902a008 2260 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2261 if (from < stop && comstart_first
b7dbcc19 2262 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
8ea151b2 2263 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 2264 {
7d0393cf 2265 /* We have encountered a comment start sequence and we
7fc8191e 2266 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2267 the comment style this sequence begins so that later,
2268 only a comment end of the same style actually ends
7fc8191e 2269 the comment section. */
b3cfe0c8 2270 code = Scomment;
8ea151b2 2271 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 2272 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2273 INC_BOTH (from, from_byte);
95ff8dfc 2274 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2275 }
04882296 2276 }
abf8a9ff 2277 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2278
da71ab91
KH
2279 if (code == Scomment_fence)
2280 comstyle = ST_COMMENT_STYLE;
2281 else if (code != Scomment)
04882296
KH
2282 {
2283 immediate_quit = 0;
6a140a74 2284 DEC_BOTH (from, from_byte);
ef316cf0 2285 SET_PT_BOTH (from, from_byte);
04882296
KH
2286 return Qnil;
2287 }
2288 /* We're at the start of a comment. */
95ff8dfc
RS
2289 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2290 &out_charpos, &out_bytepos, &dummy);
2291 from = out_charpos; from_byte = out_bytepos;
2292 if (!found)
04882296 2293 {
95ff8dfc
RS
2294 immediate_quit = 0;
2295 SET_PT_BOTH (from, from_byte);
2296 return Qnil;
b3cfe0c8 2297 }
95ff8dfc
RS
2298 INC_BOTH (from, from_byte);
2299 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2300 /* We have skipped one comment. */
840f481c 2301 count1--;
b3cfe0c8
RS
2302 }
2303
840f481c 2304 while (count1 < 0)
b3cfe0c8 2305 {
b9145dbb 2306 while (1)
b3cfe0c8 2307 {
c65adb44 2308 int quoted;
f902a008 2309
b9145dbb
RS
2310 if (from <= stop)
2311 {
6a140a74 2312 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2313 immediate_quit = 0;
2314 return Qnil;
2315 }
b3cfe0c8 2316
6a140a74 2317 DEC_BOTH (from, from_byte);
f902a008 2318 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2319 quoted = char_quoted (from, from_byte);
b7dbcc19 2320 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
b3cfe0c8
RS
2321 code = SYNTAX (c);
2322 comstyle = 0;
95ff8dfc 2323 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2324 if (code == Sendcomment)
2325 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 2326 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 2327 && prev_char_comend_first (from, from_byte)
89c6809a 2328 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2329 {
7fc8191e 2330 /* We must record the comment style encountered so that
b3cfe0c8 2331 later, we can match only the proper comment begin
7fc8191e 2332 sequence of the same style. */
6a140a74 2333 DEC_BOTH (from, from_byte);
f902a008
RS
2334 code = Sendcomment;
2335 /* Calling char_quoted, above, set up global syntax position
2336 at the new value of FROM. */
b7dbcc19 2337 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2338 comstyle = SYNTAX_COMMENT_STYLE (c1);
2339 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
2340 }
2341
195d1361
RS
2342 if (code == Scomment_fence)
2343 {
2344 /* Skip until first preceding unquoted comment_fence. */
6a140a74 2345 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 2346
6a140a74 2347 while (1)
195d1361 2348 {
6a140a74 2349 DEC_BOTH (from, from_byte);
195d1361 2350 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2351 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
6a140a74 2352 if (SYNTAX (c) == Scomment_fence
7d0393cf 2353 && !char_quoted (from, from_byte))
195d1361 2354 {
7d0393cf 2355 found = 1;
195d1361
RS
2356 break;
2357 }
fcdd4585
SM
2358 else if (from == stop)
2359 break;
195d1361
RS
2360 }
2361 if (found == 0)
2362 {
2363 from = ini; /* Set point to ini + 1. */
6a140a74 2364 from_byte = ini_byte;
195d1361
RS
2365 goto leave;
2366 }
39c41ad4
SM
2367 else
2368 /* We have skipped one comment. */
2369 break;
195d1361
RS
2370 }
2371 else if (code == Sendcomment)
b3cfe0c8 2372 {
95ff8dfc 2373 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2374 &out_charpos, &out_bytepos);
1aa963c8
SM
2375 if (found == -1)
2376 {
abf8a9ff
SM
2377 if (c == '\n')
2378 /* This end-of-line is not an end-of-comment.
2379 Treat it like a whitespace.
2380 CC-mode (and maybe others) relies on this behavior. */
2381 ;
2382 else
2383 {
2384 /* Failure: we should go back to the end of this
2385 not-quite-endcomment. */
2386 if (SYNTAX(c) != code)
2387 /* It was a two-char Sendcomment. */
2388 INC_BOTH (from, from_byte);
2389 goto leave;
2390 }
1aa963c8 2391 }
2b927d02 2392 else
abf8a9ff
SM
2393 {
2394 /* We have skipped one comment. */
2395 from = out_charpos, from_byte = out_bytepos;
2396 break;
2397 }
b3cfe0c8 2398 }
bb0de084 2399 else if (code != Swhitespace || quoted)
b3cfe0c8 2400 {
195d1361 2401 leave:
b3cfe0c8 2402 immediate_quit = 0;
6a140a74 2403 INC_BOTH (from, from_byte);
ef316cf0 2404 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2405 return Qnil;
2406 }
2407 }
2408
840f481c 2409 count1++;
b3cfe0c8
RS
2410 }
2411
ef316cf0 2412 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2413 immediate_quit = 0;
2414 return Qt;
2415}
2416\f
b7dbcc19 2417/* Return syntax code of character C if C is an ASCII character
db9f48c3 2418 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08 2419
b7dbcc19
KH
2420#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2421 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
bd25db08
KH
2422 ? SYNTAX (c) : Ssymbol)
2423
6a140a74 2424static Lisp_Object
8489eb67 2425scan_lists (from, count, depth, sexpflag)
932e6895
SM
2426 register EMACS_INT from;
2427 EMACS_INT count, depth;
2428 int sexpflag;
8489eb67
RS
2429{
2430 Lisp_Object val;
932e6895 2431 register EMACS_INT stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2432 register int c, c1;
2433 int stringterm;
8489eb67
RS
2434 int quoted;
2435 int mathexit = 0;
93da5fff 2436 register enum syntaxcode code, temp_code;
195d1361 2437 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2438 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2439 int comnested = 0; /* whether the comment is nestable or not */
932e6895
SM
2440 EMACS_INT temp_pos;
2441 EMACS_INT last_good = from;
195d1361 2442 int found;
932e6895
SM
2443 EMACS_INT from_byte;
2444 EMACS_INT out_bytepos, out_charpos;
95ff8dfc 2445 int temp, dummy;
bd25db08 2446 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2447
2448 if (depth > 0) min_depth = 0;
2449
c3907a7e
RS
2450 if (from > ZV) from = ZV;
2451 if (from < BEGV) from = BEGV;
2452
2453 from_byte = CHAR_TO_BYTE (from);
2454
8489eb67
RS
2455 immediate_quit = 1;
2456 QUIT;
2457
195d1361 2458 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2459 while (count > 0)
2460 {
8489eb67
RS
2461 while (from < stop)
2462 {
f902a008 2463 int comstart_first, prefix;
195d1361 2464 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2465 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2466 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2467 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2468 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2469 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2470 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2471 if (depth == min_depth)
2472 last_good = from;
6a140a74 2473 INC_BOTH (from, from_byte);
195d1361 2474 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2475 if (from < stop && comstart_first
327719ee
MB
2476 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2477 SYNTAX_COMSTART_SECOND (c))
8489eb67 2478 && parse_sexp_ignore_comments)
e5d4f4dc 2479 {
7d0393cf 2480 /* we have encountered a comment start sequence and we
195d1361 2481 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2482 the comment style this sequence begins so that later,
2483 only a comment end of the same style actually ends
2484 the comment section */
2485 code = Scomment;
b7dbcc19 2486 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2487 comstyle = SYNTAX_COMMENT_STYLE (c1);
2488 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2489 INC_BOTH (from, from_byte);
f902a008 2490 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2491 }
7d0393cf 2492
f902a008 2493 if (prefix)
8489eb67
RS
2494 continue;
2495
0220c518 2496 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2497 {
2498 case Sescape:
2499 case Scharquote:
8b8bf8e6
SM
2500 if (from == stop)
2501 goto lose;
6a140a74 2502 INC_BOTH (from, from_byte);
8489eb67
RS
2503 /* treat following character as a word constituent */
2504 case Sword:
2505 case Ssymbol:
2506 if (depth || !sexpflag) break;
195d1361 2507 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2508 while (from < stop)
2509 {
195d1361 2510 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2511
2512 /* Some compilers can't handle this inside the switch. */
b7dbcc19 2513 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2514 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2515 switch (temp)
8489eb67
RS
2516 {
2517 case Scharquote:
2518 case Sescape:
6a140a74 2519 INC_BOTH (from, from_byte);
8b8bf8e6
SM
2520 if (from == stop)
2521 goto lose;
8489eb67
RS
2522 break;
2523 case Sword:
2524 case Ssymbol:
2525 case Squote:
2526 break;
2527 default:
2528 goto done;
2529 }
6a140a74 2530 INC_BOTH (from, from_byte);
8489eb67
RS
2531 }
2532 goto done;
2533
195d1361 2534 case Scomment_fence:
95ff8dfc
RS
2535 comstyle = ST_COMMENT_STYLE;
2536 /* FALLTHROUGH */
2537 case Scomment:
8489eb67 2538 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2539 UPDATE_SYNTAX_TABLE_FORWARD (from);
2540 found = forw_comment (from, from_byte, stop,
2541 comnested, comstyle, 0,
2542 &out_charpos, &out_bytepos, &dummy);
2543 from = out_charpos, from_byte = out_bytepos;
2544 if (!found)
8489eb67 2545 {
95ff8dfc
RS
2546 if (depth == 0)
2547 goto done;
2548 goto lose;
8489eb67 2549 }
95ff8dfc
RS
2550 INC_BOTH (from, from_byte);
2551 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2552 break;
2553
2554 case Smath:
2555 if (!sexpflag)
2556 break;
b7dbcc19 2557 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
6a140a74
RS
2558 {
2559 INC_BOTH (from, from_byte);
2560 }
8489eb67
RS
2561 if (mathexit)
2562 {
2563 mathexit = 0;
2564 goto close1;
2565 }
2566 mathexit = 1;
2567
2568 case Sopen:
2569 if (!++depth) goto done;
2570 break;
2571
2572 case Sclose:
2573 close1:
2574 if (!--depth) goto done;
2575 if (depth < min_depth)
9471945b
KS
2576 xsignal3 (Qscan_error,
2577 build_string ("Containing expression ends prematurely"),
2578 make_number (last_good), make_number (from));
8489eb67
RS
2579 break;
2580
2581 case Sstring:
195d1361 2582 case Sstring_fence:
ef316cf0 2583 temp_pos = dec_bytepos (from_byte);
b7dbcc19 2584 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
8489eb67
RS
2585 while (1)
2586 {
8b8bf8e6
SM
2587 if (from >= stop)
2588 goto lose;
195d1361 2589 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2590 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
42ebfa31
SM
2591 if (code == Sstring
2592 ? (c == stringterm
2593 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2594 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2595 break;
7c7ca3d2
RS
2596
2597 /* Some compilers can't handle this inside the switch. */
bd25db08 2598 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2599 switch (temp)
8489eb67
RS
2600 {
2601 case Scharquote:
2602 case Sescape:
6a140a74 2603 INC_BOTH (from, from_byte);
8489eb67 2604 }
6a140a74 2605 INC_BOTH (from, from_byte);
8489eb67 2606 }
6a140a74 2607 INC_BOTH (from, from_byte);
8489eb67
RS
2608 if (!depth && sexpflag) goto done;
2609 break;
240c43e8
SM
2610 default:
2611 /* Ignore whitespace, punctuation, quote, endcomment. */
2612 break;
8489eb67
RS
2613 }
2614 }
2615
2616 /* Reached end of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2617 if (depth)
2618 goto lose;
8489eb67
RS
2619
2620 immediate_quit = 0;
2621 return Qnil;
2622
2623 /* End of object reached */
2624 done:
2625 count--;
2626 }
2627
2628 while (count < 0)
2629 {
8489eb67
RS
2630 while (from > stop)
2631 {
6a140a74 2632 DEC_BOTH (from, from_byte);
195d1361 2633 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2634 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2635 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2636 if (depth == min_depth)
2637 last_good = from;
7fc8191e 2638 comstyle = 0;
95ff8dfc 2639 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2640 if (code == Sendcomment)
2641 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2642 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2643 && prev_char_comend_first (from, from_byte)
8489eb67 2644 && parse_sexp_ignore_comments)
e5d4f4dc 2645 {
f902a008 2646 /* We must record the comment style encountered so that
e5d4f4dc 2647 later, we can match only the proper comment begin
f902a008 2648 sequence of the same style. */
6a140a74 2649 DEC_BOTH (from, from_byte);
f902a008
RS
2650 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2651 code = Sendcomment;
b7dbcc19 2652 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2653 comstyle = SYNTAX_COMMENT_STYLE (c1);
2654 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2655 }
7d0393cf 2656
9828a477 2657 /* Quoting turns anything except a comment-ender
e6365855 2658 into a word character. Note that this cannot be true
f902a008 2659 if we decremented FROM in the if-statement above. */
9828a477 2660 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2661 {
2662 DEC_BOTH (from, from_byte);
2663 code = Sword;
2664 }
9828a477 2665 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2666 continue;
2667
9828a477 2668 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2669 {
2670 case Sword:
2671 case Ssymbol:
9828a477
KH
2672 case Sescape:
2673 case Scharquote:
8489eb67 2674 if (depth || !sexpflag) break;
195d1361
RS
2675 /* This word counts as a sexp; count object finished
2676 after passing it. */
8489eb67
RS
2677 while (from > stop)
2678 {
6a140a74 2679 temp_pos = from_byte;
ef316cf0
RS
2680 if (! NILP (current_buffer->enable_multibyte_characters))
2681 DEC_POS (temp_pos);
2682 else
2683 temp_pos--;
6a140a74 2684 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2685 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2686 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2687 /* Don't allow comment-end to be quoted. */
2688 if (temp_code == Sendcomment)
2689 goto done2;
6a140a74 2690 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2691 if (quoted)
93da5fff 2692 {
6a140a74 2693 DEC_BOTH (from, from_byte);
ef316cf0 2694 temp_pos = dec_bytepos (temp_pos);
6a140a74 2695 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2696 }
b7dbcc19 2697 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2698 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2699 if (! (quoted || temp_code == Sword
2700 || temp_code == Ssymbol
2701 || temp_code == Squote))
8489eb67 2702 goto done2;
6a140a74 2703 DEC_BOTH (from, from_byte);
8489eb67
RS
2704 }
2705 goto done2;
2706
2707 case Smath:
2708 if (!sexpflag)
2709 break;
ef316cf0 2710 temp_pos = dec_bytepos (from_byte);
6a140a74 2711 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2712 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
6a140a74 2713 DEC_BOTH (from, from_byte);
8489eb67
RS
2714 if (mathexit)
2715 {
2716 mathexit = 0;
2717 goto open2;
2718 }
2719 mathexit = 1;
2720
2721 case Sclose:
2722 if (!++depth) goto done2;
2723 break;
2724
2725 case Sopen:
2726 open2:
2727 if (!--depth) goto done2;
2728 if (depth < min_depth)
9471945b
KS
2729 xsignal3 (Qscan_error,
2730 build_string ("Containing expression ends prematurely"),
2731 make_number (last_good), make_number (from));
8489eb67
RS
2732 break;
2733
2734 case Sendcomment:
2735 if (!parse_sexp_ignore_comments)
2736 break;
95ff8dfc 2737 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2738 &out_charpos, &out_bytepos);
1aa963c8
SM
2739 /* FIXME: if found == -1, then it really wasn't a comment-end.
2740 For single-char Sendcomment, we can't do much about it apart
2741 from skipping the char.
2742 For 2-char endcomments, we could try again, taking both
2743 chars as separate entities, but it's a lot of trouble
2744 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2745 if (found != -1)
2746 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2747 break;
2748
195d1361
RS
2749 case Scomment_fence:
2750 case Sstring_fence:
2751 while (1)
2752 {
8b8bf8e6
SM
2753 if (from == stop)
2754 goto lose;
6b61353c 2755 DEC_BOTH (from, from_byte);
195d1361 2756 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2757 if (!char_quoted (from, from_byte)
b7dbcc19 2758 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
bd25db08 2759 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2760 break;
2761 }
2762 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2763 break;
7d0393cf 2764
8489eb67 2765 case Sstring:
b7dbcc19 2766 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8489eb67
RS
2767 while (1)
2768 {
8b8bf8e6
SM
2769 if (from == stop)
2770 goto lose;
6b61353c
KH
2771 DEC_BOTH (from, from_byte);
2772 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2773 if (!char_quoted (from, from_byte)
2774 && (stringterm
2775 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
42ebfa31 2776 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2777 break;
8489eb67 2778 }
8489eb67
RS
2779 if (!depth && sexpflag) goto done2;
2780 break;
240c43e8
SM
2781 default:
2782 /* Ignore whitespace, punctuation, quote, endcomment. */
2783 break;
8489eb67
RS
2784 }
2785 }
2786
2787 /* Reached start of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2788 if (depth)
2789 goto lose;
8489eb67
RS
2790
2791 immediate_quit = 0;
2792 return Qnil;
2793
2794 done2:
2795 count++;
2796 }
2797
2798
2799 immediate_quit = 0;
1e142fb7 2800 XSETFASTINT (val, from);
8489eb67
RS
2801 return val;
2802
2803 lose:
9471945b
KS
2804 xsignal3 (Qscan_error,
2805 build_string ("Unbalanced parentheses"),
2806 make_number (last_good), make_number (from));
8489eb67
RS
2807}
2808
8489eb67 2809DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2810 doc: /* Scan from character number FROM by COUNT lists.
2811Returns the character number of the position thus found.
2812
2813If DEPTH is nonzero, paren depth begins counting from that value,
2814only places where the depth in parentheses becomes zero
2815are candidates for stopping; COUNT such places are counted.
2816Thus, a positive value for DEPTH means go out levels.
2817
2818Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2819
2820If the beginning or end of (the accessible part of) the buffer is reached
2821and the depth is wrong, an error is signaled.
2822If the depth is right but the count is not used up, nil is returned. */)
2823 (from, count, depth)
8489eb67
RS
2824 Lisp_Object from, count, depth;
2825{
b7826503
PJ
2826 CHECK_NUMBER (from);
2827 CHECK_NUMBER (count);
2828 CHECK_NUMBER (depth);
8489eb67
RS
2829
2830 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2831}
2832
2833DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2834 doc: /* Scan from character number FROM by COUNT balanced expressions.
2835If COUNT is negative, scan backwards.
2836Returns the character number of the position thus found.
2837
2838Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2839
2840If the beginning or end of (the accessible part of) the buffer is reached
2841in the middle of a parenthetical grouping, an error is signaled.
2842If the beginning or end is reached between groupings
2843but before count is used up, nil is returned. */)
2844 (from, count)
8489eb67
RS
2845 Lisp_Object from, count;
2846{
b7826503
PJ
2847 CHECK_NUMBER (from);
2848 CHECK_NUMBER (count);
8489eb67
RS
2849
2850 return scan_lists (XINT (from), XINT (count), 0, 1);
2851}
2852
2853DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2854 0, 0, 0,
2855 doc: /* Move point backward over any number of chars with prefix syntax.
2856This includes chars with "quote" or "prefix" syntax (' or p). */)
2857 ()
8489eb67
RS
2858{
2859 int beg = BEGV;
6a140a74
RS
2860 int opoint = PT;
2861 int opoint_byte = PT_BYTE;
6ec8bbd2 2862 int pos = PT;
6a140a74 2863 int pos_byte = PT_BYTE;
93da5fff 2864 int c;
93da5fff 2865
7d0393cf 2866 if (pos <= beg)
195d1361 2867 {
f902a008
RS
2868 SET_PT_BOTH (opoint, opoint_byte);
2869
2870 return Qnil;
195d1361 2871 }
8489eb67 2872
f902a008
RS
2873 SETUP_SYNTAX_TABLE (pos, -1);
2874
6a140a74
RS
2875 DEC_BOTH (pos, pos_byte);
2876
1fd3172d 2877 while (!char_quoted (pos, pos_byte)
195d1361 2878 /* Previous statement updates syntax table. */
b7dbcc19 2879 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2880 || SYNTAX_PREFIX (c)))
2881 {
1fd3172d
RS
2882 opoint = pos;
2883 opoint_byte = pos_byte;
2884
2885 if (pos + 1 > beg)
2886 DEC_BOTH (pos, pos_byte);
93da5fff 2887 }
8489eb67 2888
6a140a74 2889 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2890
2891 return Qnil;
2892}
2893\f
6a140a74 2894/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2895 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2896 and return a description of the state of the parse at END.
c81a3712 2897 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2898 If COMMENTSTOP is 1, stop at the start of a comment.
2899 If COMMENTSTOP is -1, stop at the start or end of a comment,
2900 after the beginning of a string, or after the end of a string. */
8489eb67 2901
340f92b5 2902static void
6a140a74 2903scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2904 stopbefore, oldstate, commentstop)
e5d4f4dc 2905 struct lisp_parse_state *stateptr;
8e2911c2
AS
2906 register EMACS_INT from;
2907 EMACS_INT from_byte, end;
2908 int targetdepth, stopbefore;
8489eb67 2909 Lisp_Object oldstate;
c81a3712 2910 int commentstop;
8489eb67
RS
2911{
2912 struct lisp_parse_state state;
2913
2914 register enum syntaxcode code;
95ff8dfc
RS
2915 int c1;
2916 int comnested;
8489eb67
RS
2917 struct level { int last, prev; };
2918 struct level levelstart[100];
2919 register struct level *curlevel = levelstart;
2920 struct level *endlevel = levelstart + 100;
8489eb67
RS
2921 register int depth; /* Paren depth of current scanning location.
2922 level - levelstart equals this except
2923 when the depth becomes negative. */
2924 int mindepth; /* Lowest DEPTH value seen. */
2925 int start_quoted = 0; /* Nonzero means starting after a char quote */
2926 Lisp_Object tem;
8e2911c2
AS
2927 EMACS_INT prev_from; /* Keep one character before FROM. */
2928 EMACS_INT prev_from_byte;
1fd3172d 2929 int prev_from_syntax;
195d1361
RS
2930 int boundary_stop = commentstop == -1;
2931 int nofence;
95ff8dfc 2932 int found;
60c86a83 2933 EMACS_INT out_bytepos, out_charpos;
7c7ca3d2 2934 int temp;
93da5fff
KH
2935
2936 prev_from = from;
6a140a74
RS
2937 prev_from_byte = from_byte;
2938 if (from != BEGV)
2939 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2940
2941 /* Use this macro instead of `from++'. */
6a140a74
RS
2942#define INC_FROM \
2943do { prev_from = from; \
2944 prev_from_byte = from_byte; \
327719ee 2945 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
ab229fdd 2946 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2947 INC_BOTH (from, from_byte); \
3ceb4629
SM
2948 if (from < end) \
2949 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2950 } while (0)
8489eb67
RS
2951
2952 immediate_quit = 1;
2953 QUIT;
2954
265a9e55 2955 if (NILP (oldstate))
8489eb67
RS
2956 {
2957 depth = 0;
2958 state.instring = -1;
2959 state.incomment = 0;
195d1361
RS
2960 state.comstyle = 0; /* comment style a by default. */
2961 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2962 }
2963 else
2964 {
2965 tem = Fcar (oldstate);
265a9e55 2966 if (!NILP (tem))
8489eb67
RS
2967 depth = XINT (tem);
2968 else
2969 depth = 0;
2970
2971 oldstate = Fcdr (oldstate);
2972 oldstate = Fcdr (oldstate);
2973 oldstate = Fcdr (oldstate);
2974 tem = Fcar (oldstate);
195d1361 2975 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2976 state.instring = (!NILP (tem)
2977 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2978 : -1);
8489eb67
RS
2979
2980 oldstate = Fcdr (oldstate);
2981 tem = Fcar (oldstate);
e76f1c44
GM
2982 state.incomment = (!NILP (tem)
2983 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2984 : 0);
8489eb67
RS
2985
2986 oldstate = Fcdr (oldstate);
2987 tem = Fcar (oldstate);
265a9e55 2988 start_quoted = !NILP (tem);
e5d4f4dc 2989
95ff8dfc 2990 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2991 style a. If it is non-nil, we are in comment style b */
2992 oldstate = Fcdr (oldstate);
e5d4f4dc 2993 oldstate = Fcdr (oldstate);
195d1361 2994 tem = Fcar (oldstate);
7d0393cf 2995 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 2996 ? ST_COMMENT_STYLE : 1);
195d1361 2997
e5d4f4dc 2998 oldstate = Fcdr (oldstate);
e5d4f4dc 2999 tem = Fcar (oldstate);
195d1361 3000 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
3001 oldstate = Fcdr (oldstate);
3002 tem = Fcar (oldstate);
3003 while (!NILP (tem)) /* >= second enclosing sexps. */
3004 {
3005 /* curlevel++->last ran into compiler bug on Apollo */
3006 curlevel->last = XINT (Fcar (tem));
3007 if (++curlevel == endlevel)
02010917 3008 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
3009 curlevel->prev = -1;
3010 curlevel->last = -1;
3011 tem = Fcdr (tem);
3012 }
8489eb67
RS
3013 }
3014 state.quoted = 0;
3015 mindepth = depth;
3016
3017 curlevel->prev = -1;
3018 curlevel->last = -1;
3019
326b283b 3020 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
3021 temp = FETCH_CHAR (prev_from_byte);
3022 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 3023 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 3024
195d1361 3025 /* Enter the loop at a place appropriate for initial state. */
8489eb67 3026
326b283b
RS
3027 if (state.incomment)
3028 goto startincomment;
8489eb67
RS
3029 if (state.instring >= 0)
3030 {
195d1361 3031 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
3032 if (start_quoted)
3033 goto startquotedinstring;
8489eb67
RS
3034 goto startinstring;
3035 }
326b283b
RS
3036 else if (start_quoted)
3037 goto startquoted;
1fd3172d 3038
8489eb67
RS
3039 while (from < end)
3040 {
93da5fff 3041 INC_FROM;
1fd3172d 3042 code = prev_from_syntax & 0xff;
4c920633 3043
c5d4b96f
SM
3044 if (from < end
3045 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3046 && (c1 = FETCH_CHAR (from_byte),
3047 SYNTAX_COMSTART_SECOND (c1)))
3048 /* Duplicate code to avoid a complex if-expression
3049 which causes trouble for the SGI compiler. */
95ff8dfc 3050 {
c5d4b96f
SM
3051 /* Record the comment style we have entered so that only
3052 the comment-end sequence of the same style actually
3053 terminates the comment section. */
3054 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
3055 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3056 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
3057 state.incomment = comnested ? 1 : -1;
95ff8dfc 3058 state.comstr_start = prev_from;
c5d4b96f
SM
3059 INC_FROM;
3060 code = Scomment;
95ff8dfc 3061 }
4c920633 3062 else if (code == Scomment_fence)
e5d4f4dc
RS
3063 {
3064 /* Record the comment style we have entered so that only
3065 the comment-end sequence of the same style actually
3066 terminates the comment section. */
95ff8dfc
RS
3067 state.comstyle = ST_COMMENT_STYLE;
3068 state.incomment = -1;
195d1361 3069 state.comstr_start = prev_from;
e5d4f4dc 3070 code = Scomment;
e5d4f4dc 3071 }
c5d4b96f
SM
3072 else if (code == Scomment)
3073 {
3074 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
3075 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3076 1 : -1);
3077 state.comstr_start = prev_from;
3078 }
e5d4f4dc 3079
1fd3172d 3080 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 3081 continue;
0220c518 3082 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
3083 {
3084 case Sescape:
3085 case Scharquote:
3086 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3087 curlevel->last = prev_from;
8489eb67
RS
3088 startquoted:
3089 if (from == end) goto endquoted;
93da5fff 3090 INC_FROM;
8489eb67
RS
3091 goto symstarted;
3092 /* treat following character as a word constituent */
3093 case Sword:
3094 case Ssymbol:
3095 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3096 curlevel->last = prev_from;
8489eb67
RS
3097 symstarted:
3098 while (from < end)
3099 {
7c7ca3d2 3100 /* Some compilers can't handle this inside the switch. */
327719ee 3101 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
ab229fdd 3102 temp = SYNTAX (temp);
7c7ca3d2 3103 switch (temp)
8489eb67
RS
3104 {
3105 case Scharquote:
3106 case Sescape:
93da5fff 3107 INC_FROM;
8489eb67
RS
3108 if (from == end) goto endquoted;
3109 break;
3110 case Sword:
3111 case Ssymbol:
3112 case Squote:
3113 break;
3114 default:
3115 goto symdone;
3116 }
93da5fff 3117 INC_FROM;
8489eb67
RS
3118 }
3119 symdone:
3120 curlevel->prev = curlevel->last;
3121 break;
3122
240c43e8 3123 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 3124 case Scomment:
195d1361 3125 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
3126 startincomment:
3127 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 3128 that we find a 2-char comment ender even if we start in the
e6365855 3129 middle of it. We don't want to do that if we're just at the
d355bd8a 3130 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
3131 found = forw_comment (from, from_byte, end,
3132 state.incomment, state.comstyle,
e6365855
SM
3133 (from == BEGV || from < state.comstr_start + 3)
3134 ? 0 : prev_from_syntax,
95ff8dfc
RS
3135 &out_charpos, &out_bytepos, &state.incomment);
3136 from = out_charpos; from_byte = out_bytepos;
3137 /* Beware! prev_from and friends are invalid now.
3138 Luckily, the `done' doesn't use them and the INC_FROM
3139 sets them to a sane value without looking at them. */
3140 if (!found) goto done;
d355bd8a 3141 INC_FROM;
8489eb67 3142 state.incomment = 0;
e5d4f4dc 3143 state.comstyle = 0; /* reset the comment style */
195d1361 3144 if (boundary_stop) goto done;
8489eb67
RS
3145 break;
3146
3147 case Sopen:
3148 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3149 depth++;
3150 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 3151 curlevel->last = prev_from;
8489eb67 3152 if (++curlevel == endlevel)
02010917 3153 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
3154 curlevel->prev = -1;
3155 curlevel->last = -1;
30844415 3156 if (targetdepth == depth) goto done;
8489eb67
RS
3157 break;
3158
3159 case Sclose:
3160 depth--;
3161 if (depth < mindepth)
3162 mindepth = depth;
3163 if (curlevel != levelstart)
3164 curlevel--;
3165 curlevel->prev = curlevel->last;
30844415 3166 if (targetdepth == depth) goto done;
8489eb67
RS
3167 break;
3168
3169 case Sstring:
195d1361
RS
3170 case Sstring_fence:
3171 state.comstr_start = from - 1;
8489eb67 3172 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3173 curlevel->last = prev_from;
7d0393cf 3174 state.instring = (code == Sstring
b7dbcc19 3175 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
195d1361
RS
3176 : ST_STRING_STYLE);
3177 if (boundary_stop) goto done;
8489eb67 3178 startinstring:
195d1361 3179 {
644ea4df 3180 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 3181
644ea4df
RS
3182 while (1)
3183 {
3184 int c;
195d1361 3185
644ea4df 3186 if (from >= end) goto done;
b7dbcc19 3187 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
7c7ca3d2
RS
3188 /* Some compilers can't handle this inside the switch. */
3189 temp = SYNTAX (c);
ccf89641
KH
3190
3191 /* Check TEMP here so that if the char has
3192 a syntax-table property which says it is NOT
3193 a string character, it does not end the string. */
3194 if (nofence && c == state.instring && temp == Sstring)
3195 break;
3196
7c7ca3d2 3197 switch (temp)
644ea4df
RS
3198 {
3199 case Sstring_fence:
3200 if (!nofence) goto string_end;
3201 break;
3202 case Scharquote:
3203 case Sescape:
3204 INC_FROM;
3205 startquotedinstring:
3206 if (from >= end) goto endquoted;
195d1361 3207 }
644ea4df
RS
3208 INC_FROM;
3209 }
195d1361
RS
3210 }
3211 string_end:
8489eb67
RS
3212 state.instring = -1;
3213 curlevel->prev = curlevel->last;
93da5fff 3214 INC_FROM;
195d1361 3215 if (boundary_stop) goto done;
8489eb67
RS
3216 break;
3217
3218 case Smath:
240c43e8
SM
3219 /* FIXME: We should do something with it. */
3220 break;
3221 default:
3222 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
3223 break;
3224 }
3225 }
3226 goto done;
3227
3228 stop: /* Here if stopping before start of sexp. */
93da5fff 3229 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3230 goto done; /* but return the position before it. */
3231
3232 endquoted:
3233 state.quoted = 1;
3234 done:
3235 state.depth = depth;
3236 state.mindepth = mindepth;
3237 state.thislevelstart = curlevel->prev;
3238 state.prevlevelstart
3239 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3240 state.location = from;
1a102a58
RS
3241 state.levelstarts = Qnil;
3242 while (--curlevel >= levelstart)
3243 state.levelstarts = Fcons (make_number (curlevel->last),
3244 state.levelstarts);
8489eb67
RS
3245 immediate_quit = 0;
3246
e5d4f4dc 3247 *stateptr = state;
8489eb67
RS
3248}
3249
c81a3712 3250DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3251 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3252Parsing stops at TO or when certain criteria are met;
3253 point is set to where parsing stops.
3254If fifth arg OLDSTATE is omitted or nil,
3255 parsing assumes that FROM is the beginning of a function.
0e6228bc 3256Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3257 0. depth in parens.
3258 1. character address of start of innermost containing list; nil if none.
3259 2. character address of start of last complete sexp terminated.
3260 3. non-nil if inside a string.
3261 (it is the character that will terminate the string,
3262 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3263 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3264 else an integer (the current comment nesting).
3265 5. t if following a quote character.
3266 6. the minimum paren-depth encountered during this scan.
3267 7. t if in a comment of style b; symbol `syntax-table' if the comment
3268 should be terminated by a generic comment delimiter.
3269 8. character address of start of comment or string; nil if not in one.
3270 9. Intermediate data for continuation of parsing (subject to change).
3271If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3272in parentheses becomes equal to TARGETDEPTH.
3273Fourth arg STOPBEFORE non-nil means stop when come to
3274 any character that starts a sexp.
0e6228bc 3275Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3276 It is used to initialize the state of the parse. Elements number 1, 2, 6
43726c05 3277 and 8 are ignored.
fdb82f93
PJ
3278Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3279 If it is symbol `syntax-table', stop after the start of a comment or a
3280 string, or after end of a comment or a string. */)
3281 (from, to, targetdepth, stopbefore, oldstate, commentstop)
c81a3712 3282 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
3283{
3284 struct lisp_parse_state state;
3285 int target;
3286
265a9e55 3287 if (!NILP (targetdepth))
8489eb67 3288 {
b7826503 3289 CHECK_NUMBER (targetdepth);
8489eb67
RS
3290 target = XINT (targetdepth);
3291 }
3292 else
3293 target = -100000; /* We won't reach this depth */
3294
3295 validate_region (&from, &to);
6a140a74
RS
3296 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3297 XINT (to),
c81a3712 3298 target, !NILP (stopbefore), oldstate,
7d0393cf 3299 (NILP (commentstop)
195d1361 3300 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3301
3302 SET_PT (state.location);
7d0393cf 3303
8489eb67
RS
3304 return Fcons (make_number (state.depth),
3305 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3306 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3307 Fcons (state.instring >= 0
3308 ? (state.instring == ST_STRING_STYLE
195d1361 3309 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3310 Fcons (state.incomment < 0 ? Qt :
3311 (state.incomment == 0 ? Qnil :
3312 make_number (state.incomment)),
8489eb67 3313 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3314 Fcons (make_number (state.mindepth),
7d0393cf 3315 Fcons ((state.comstyle
bff37d2a
RS
3316 ? (state.comstyle == ST_COMMENT_STYLE
3317 ? Qsyntax_table : Qt) :
3318 Qnil),
0beaf54f
DL
3319 Fcons (((state.incomment
3320 || (state.instring >= 0))
bff37d2a
RS
3321 ? make_number (state.comstr_start)
3322 : Qnil),
1a102a58 3323 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3324}
3325\f
dfcf069d 3326void
8489eb67
RS
3327init_syntax_once ()
3328{
78f9a1f7 3329 register int i, c;
8ea151b2 3330 Lisp_Object temp;
8489eb67 3331
5ebaddf5 3332 /* This has to be done here, before we call Fmake_char_table. */
d67b4f80 3333 Qsyntax_table = intern_c_string ("syntax-table");
5ebaddf5
RS
3334 staticpro (&Qsyntax_table);
3335
d67b4f80 3336 /* Intern_C_String this now in case it isn't already done.
5ebaddf5
RS
3337 Setting this variable twice is harmless.
3338 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 3339 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
5ebaddf5 3340
93da5fff 3341 /* Create objects which can be shared among syntax tables. */
42ebfa31 3342 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3343 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3344 XVECTOR (Vsyntax_code_object)->contents[i]
3345 = Fcons (make_number (i), Qnil);
3346
5ebaddf5
RS
3347 /* Now we are ready to set up this property, so we can
3348 create syntax tables. */
3349 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3350
93da5fff 3351 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3352
5ebaddf5 3353 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3354
aa7b08b4
RS
3355 /* Control characters should not be whitespace. */
3356 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3357 for (i = 0; i <= ' ' - 1; i++)
3358 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3359 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3360
3361 /* Except that a few really are whitespace. */
3362 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3363 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3364 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3365 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3366 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3367 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3368
93da5fff 3369 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3370 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3371 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3372 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3373 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3374 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3375 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3376
3377 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3378 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3379
3380 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3381 Fcons (make_number (Sopen), make_number (')')));
3382 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3383 Fcons (make_number (Sclose), make_number ('(')));
3384 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3385 Fcons (make_number (Sopen), make_number (']')));
3386 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3387 Fcons (make_number (Sclose), make_number ('[')));
3388 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3389 Fcons (make_number (Sopen), make_number ('}')));
3390 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3391 Fcons (make_number (Sclose), make_number ('{')));
3392 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3393 Fcons (make_number ((int) Sstring), Qnil));
3394 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3395 Fcons (make_number ((int) Sescape), Qnil));
3396
93da5fff 3397 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3398 for (i = 0; i < 10; i++)
78f9a1f7
KH
3399 {
3400 c = "_-+*/&|<>="[i];
3401 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3402 }
8489eb67 3403
93da5fff 3404 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3405 for (i = 0; i < 12; i++)
78f9a1f7
KH
3406 {
3407 c = ".,;:?!#@~^'`"[i];
3408 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3409 }
bd25db08
KH
3410
3411 /* All multibyte characters have syntax `word' by default. */
3412 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
5c7b02ab 3413 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
8489eb67
RS
3414}
3415
dfcf069d 3416void
8489eb67
RS
3417syms_of_syntax ()
3418{
d67b4f80 3419 Qsyntax_table_p = intern_c_string ("syntax-table-p");
8489eb67
RS
3420 staticpro (&Qsyntax_table_p);
3421
93da5fff
KH
3422 staticpro (&Vsyntax_code_object);
3423
a2994c46
KS
3424 staticpro (&gl_state.object);
3425 staticpro (&gl_state.global_code);
3426 staticpro (&gl_state.current_syntax_table);
3427 staticpro (&gl_state.old_prop);
3428
3429 /* Defined in regex.c */
3430 staticpro (&re_match_object);
3431
d67b4f80 3432 Qscan_error = intern_c_string ("scan-error");
7bf5e9e4
RS
3433 staticpro (&Qscan_error);
3434 Fput (Qscan_error, Qerror_conditions,
d67b4f80 3435 pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
7bf5e9e4 3436 Fput (Qscan_error, Qerror_message,
d67b4f80 3437 make_pure_c_string ("Scan error"));
7bf5e9e4 3438
8489eb67 3439 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3440 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3441
195d1361 3442 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3443 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3444Otherwise, that text property is simply ignored.
3445See the info node `(elisp)Syntax Properties' for a description of the
3446`syntax-table' property. */);
195d1361 3447
8489eb67
RS
3448 words_include_escapes = 0;
3449 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3450 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3451
bd25db08 3452 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3453 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3454 multibyte_syntax_as_symbol = 0;
3455
f4ed767f
GM
3456 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3457 &open_paren_in_column_0_is_defun_start,
b222e415 3458 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3459 open_paren_in_column_0_is_defun_start = 1;
3460
8f924df7
KH
3461
3462 DEFVAR_LISP ("find-word-boundary-function-table",
3463 &Vfind_word_boundary_function_table,
869bb237 3464 doc: /*
8f924df7 3465Char table of functions to search for the word boundary.
869bb237
KH
3466Each function is called with two arguments; POS and LIMIT.
3467POS and LIMIT are character positions in the current buffer.
3468
3469If POS is less than LIMIT, POS is at the first character of a word,
3470and the return value of a function is a position after the last
3471character of that word.
3472
3473If POS is not less than LIMIT, POS is at the last character of a word,
3474and the return value of a function is a position at the first
3475character of that word.
3476
3477In both cases, LIMIT bounds the search. */);
8f924df7 3478 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
869bb237 3479
8489eb67
RS
3480 defsubr (&Ssyntax_table_p);
3481 defsubr (&Ssyntax_table);
3482 defsubr (&Sstandard_syntax_table);
3483 defsubr (&Scopy_syntax_table);
3484 defsubr (&Sset_syntax_table);
3485 defsubr (&Schar_syntax);
beefa22e 3486 defsubr (&Smatching_paren);
c65adb44 3487 defsubr (&Sstring_to_syntax);
8489eb67 3488 defsubr (&Smodify_syntax_entry);
62abe9cb 3489 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3490
3491 defsubr (&Sforward_word);
3492
195d1361
RS
3493 defsubr (&Sskip_chars_forward);
3494 defsubr (&Sskip_chars_backward);
3495 defsubr (&Sskip_syntax_forward);
3496 defsubr (&Sskip_syntax_backward);
3497
b3cfe0c8 3498 defsubr (&Sforward_comment);
8489eb67
RS
3499 defsubr (&Sscan_lists);
3500 defsubr (&Sscan_sexps);
3501 defsubr (&Sbackward_prefix_chars);
3502 defsubr (&Sparse_partial_sexp);
3503}
839966f3
KH
3504
3505/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3506 (do not change this comment) */