Cleanups for src/callproc.c.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
429ab54e 2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
114f9c96 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
429ab54e 4 Free Software Foundation, Inc.
8489eb67
RS
5
6This file is part of GNU Emacs.
7
9ec0b715 8GNU Emacs is free software: you can redistribute it and/or modify
8489eb67 9it under the terms of the GNU General Public License as published by
9ec0b715
GM
10the Free Software Foundation, either version 3 of the License, or
11(at your option) any later version.
8489eb67
RS
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
9ec0b715 19along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67 23#include <ctype.h>
d7306fe6 24#include <setjmp.h>
8489eb67
RS
25#include "lisp.h"
26#include "commands.h"
27#include "buffer.h"
5c7b02ab 28#include "character.h"
e35f6ff7 29#include "keymap.h"
a1bc88d4 30#include "regex.h"
195d1361
RS
31
32/* Make syntax table lookup grant data in gl_state. */
33#define SYNTAX_ENTRY_VIA_PROPERTY
34
8489eb67 35#include "syntax.h"
195d1361
RS
36#include "intervals.h"
37
38/* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
41
42#define ST_COMMENT_STYLE (256 + 1)
43#define ST_STRING_STYLE (256 + 2)
93da5fff 44#include "category.h"
8489eb67 45
7bf5e9e4 46Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
47
48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
bd25db08
KH
51/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52int multibyte_syntax_as_symbol;
53
8ea151b2
RS
54/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57Lisp_Object syntax_temp;
58
f4ed767f
GM
59/* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
62
63int open_paren_in_column_0_is_defun_start;
64
e5d4f4dc
RS
65/* This is the internal form of the parse state used in parse-partial-sexp. */
66
67struct lisp_parse_state
68 {
6449674e
SM
69 int depth; /* Depth at end of parsing. */
70 int instring; /* -1 if not within string, else desired terminator. */
71 int incomment; /* -1 if in unnestable comment else comment nesting */
72 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
73 int quoted; /* Nonzero if just after an escape char at end of parsing */
74 int mindepth; /* Minimum depth seen while scanning. */
75 /* Char number of most recent start-of-expression at current level */
76 EMACS_INT thislevelstart;
77 /* Char number of start of containing expression */
78 EMACS_INT prevlevelstart;
79 EMACS_INT location; /* Char number at which parsing stopped. */
80 EMACS_INT comstr_start; /* Position of last comment/string starter. */
81 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
82 of levels (starting from outermost). */
e5d4f4dc
RS
83 };
84\f
37bef230
RS
85/* These variables are a cache for finding the start of a defun.
86 find_start_pos is the place for which the defun start was found.
87 find_start_value is the defun start position found for it.
6a140a74 88 find_start_value_byte is the corresponding byte position.
37bef230
RS
89 find_start_buffer is the buffer it was found in.
90 find_start_begv is the BEGV value when it was found.
91 find_start_modiff is the value of MODIFF when it was found. */
92
8e2911c2
AS
93static EMACS_INT find_start_pos;
94static EMACS_INT find_start_value;
95static EMACS_INT find_start_value_byte;
37bef230 96static struct buffer *find_start_buffer;
8e2911c2 97static EMACS_INT find_start_begv;
37bef230 98static int find_start_modiff;
6a140a74
RS
99
100
f57e2426
J
101static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
102static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
103static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
104static void scan_sexps_forward (struct lisp_parse_state *,
105 EMACS_INT, EMACS_INT, EMACS_INT, int,
106 int, Lisp_Object, int);
107static int in_classes (int, Lisp_Object);
195d1361
RS
108\f
109
110struct gl_state_s gl_state; /* Global state of syntax parser. */
111
971de7fb 112INTERVAL interval_of (int, Lisp_Object);
195d1361
RS
113#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
114 to scan to property-change. */
115
6a140a74
RS
116/* Update gl_state to an appropriate interval which contains CHARPOS. The
117 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 118 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 119 valid at start, the rest is filled basing on OBJECT.
195d1361 120
6a140a74 121 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
122 direction than the intervals - or in an interval. We update the
123 current syntax-table basing on the property of this interval, and
6a140a74 124 update the interval to start further than CHARPOS - or be
195d1361 125 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 126 charpos to call this subroutine again - or be before/after the
195d1361
RS
127 start/end of OBJECT. */
128
129void
971de7fb 130update_syntax_table (int charpos, int count, int init, Lisp_Object object)
195d1361
RS
131{
132 Lisp_Object tmp_table;
4ffe723b 133 int cnt = 0, invalidate = 1;
5a774522 134 INTERVAL i;
195d1361
RS
135
136 if (init)
137 {
bb0de084 138 gl_state.old_prop = Qnil;
195d1361
RS
139 gl_state.start = gl_state.b_property;
140 gl_state.stop = gl_state.e_property;
bb0de084
SM
141 i = interval_of (charpos, object);
142 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
143 invalidate = 0;
144 if (NULL_INTERVAL_P (i))
145 return;
f902a008 146 /* interval_of updates only ->position of the return value, so
d80f4cc7 147 update the parents manually to speed up update_interval. */
bb0de084 148 while (!NULL_PARENT (i))
d80f4cc7
RS
149 {
150 if (AM_RIGHT_CHILD (i))
439d5cb4 151 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 152 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
153 - TOTAL_LENGTH (INTERVAL_PARENT (i))
154 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 155 else
439d5cb4 156 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 157 + TOTAL_LENGTH (i);
439d5cb4 158 i = INTERVAL_PARENT (i);
d80f4cc7
RS
159 }
160 i = gl_state.forward_i;
bb0de084 161 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 162 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
163 goto update;
164 }
5a774522 165 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 166
423e705d 167 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 168 or further off. */
195d1361
RS
169 if (NULL_INTERVAL_P (i))
170 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 171 else if (charpos < i->position) /* Move left. */
195d1361
RS
172 {
173 if (count > 0)
f902a008 174 error ("Error in syntax_table logic for intervals <-");
195d1361 175 /* Update the interval. */
6a140a74 176 i = update_interval (i, charpos);
bb0de084 177 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
178 {
179 invalidate = 0;
195d1361 180 gl_state.forward_i = i;
ee0cdb48 181 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 182 }
423e705d 183 }
6a140a74 184 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
185 {
186 if (count < 0)
f902a008 187 error ("Error in syntax_table logic for intervals ->");
195d1361 188 /* Update the interval. */
6a140a74 189 i = update_interval (i, charpos);
bb0de084 190 if (i->position != gl_state.e_property)
195d1361
RS
191 {
192 invalidate = 0;
195d1361 193 gl_state.backward_i = i;
bb0de084 194 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
195 }
196 }
195d1361
RS
197
198 update:
199 tmp_table = textget (i->plist, Qsyntax_table);
200
201 if (invalidate)
202 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 203
423e705d
SM
204 if (invalidate) /* Did not get to adjacent interval. */
205 { /* with the same table => */
206 /* invalidate the old range. */
195d1361
RS
207 if (count > 0)
208 {
209 gl_state.backward_i = i;
bb0de084
SM
210 gl_state.b_property = i->position - gl_state.offset;
211 }
212 else
195d1361 213 {
bb0de084 214 gl_state.forward_i = i;
ee0cdb48 215 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
216 }
217 }
37bef230 218
bb0de084 219 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 220 {
bb0de084
SM
221 gl_state.current_syntax_table = tmp_table;
222 gl_state.old_prop = tmp_table;
223 if (EQ (Fsyntax_table_p (tmp_table), Qt))
224 {
225 gl_state.use_global = 0;
7d0393cf 226 }
bb0de084
SM
227 else if (CONSP (tmp_table))
228 {
229 gl_state.use_global = 1;
230 gl_state.global_code = tmp_table;
231 }
7d0393cf 232 else
bb0de084
SM
233 {
234 gl_state.use_global = 0;
235 gl_state.current_syntax_table = current_buffer->syntax_table;
236 }
195d1361
RS
237 }
238
239 while (!NULL_INTERVAL_P (i))
240 {
241 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
242 {
243 if (count > 0)
bb0de084
SM
244 {
245 gl_state.e_property = i->position - gl_state.offset;
246 gl_state.forward_i = i;
247 }
248 else
249 {
5a774522
SM
250 gl_state.b_property
251 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
252 gl_state.backward_i = i;
253 }
254 return;
195d1361 255 }
7d0393cf 256 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
257 {
258 if (count > 0)
bb0de084 259 {
5a774522
SM
260 gl_state.e_property
261 = i->position + LENGTH (i) - gl_state.offset
262 /* e_property at EOB is not set to ZV but to ZV+1, so that
263 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
264 having to check eob between the two. */
265 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
266 gl_state.forward_i = i;
267 }
268 else
269 {
270 gl_state.b_property = i->position - gl_state.offset;
271 gl_state.backward_i = i;
272 }
273 return;
195d1361
RS
274 }
275 cnt++;
276 i = count > 0 ? next_interval (i) : previous_interval (i);
277 }
bb0de084
SM
278 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
279 if (count > 0)
280 gl_state.e_property = gl_state.stop;
281 else
282 gl_state.b_property = gl_state.start;
195d1361
RS
283}
284\f
6a140a74
RS
285/* Returns TRUE if char at CHARPOS is quoted.
286 Global syntax-table data should be set up already to be good at CHARPOS
287 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
288
289static int
6449674e 290char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
195d1361
RS
291{
292 register enum syntaxcode code;
6449674e 293 register EMACS_INT beg = BEGV;
195d1361 294 register int quoted = 0;
6449674e 295 EMACS_INT orig = charpos;
6a140a74 296
02d8b017 297 while (charpos > beg)
195d1361 298 {
ab229fdd 299 int c;
02d8b017 300 DEC_BOTH (charpos, bytepos);
ab229fdd 301
6a140a74 302 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
327719ee 303 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
ab229fdd 304 code = SYNTAX (c);
6a140a74
RS
305 if (! (code == Scharquote || code == Sescape))
306 break;
307
6a140a74 308 quoted = !quoted;
195d1361 309 }
6a140a74
RS
310
311 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
312 return quoted;
313}
6a140a74
RS
314
315/* Return the bytepos one character after BYTEPOS.
316 We assume that BYTEPOS is not at the end of the buffer. */
317
6449674e 318INLINE EMACS_INT
971de7fb 319inc_bytepos (EMACS_INT bytepos)
6a140a74 320{
ef316cf0
RS
321 if (NILP (current_buffer->enable_multibyte_characters))
322 return bytepos + 1;
323
6a140a74
RS
324 INC_POS (bytepos);
325 return bytepos;
326}
327
328/* Return the bytepos one character before BYTEPOS.
329 We assume that BYTEPOS is not at the start of the buffer. */
330
6449674e 331INLINE EMACS_INT
971de7fb 332dec_bytepos (EMACS_INT bytepos)
6a140a74 333{
ef316cf0
RS
334 if (NILP (current_buffer->enable_multibyte_characters))
335 return bytepos - 1;
336
6a140a74
RS
337 DEC_POS (bytepos);
338 return bytepos;
339}
195d1361 340\f
2b34df4e 341/* Return a defun-start position before POS and not too far before.
7cc80f0a
RS
342 It should be the last one before POS, or nearly the last.
343
344 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 345 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
346
347 We record the information about where the scan started
348 and what its result was, so that another call in the same area
349 can return the same value very quickly.
195d1361
RS
350
351 There is no promise at which position the global syntax data is
352 valid on return from the subroutine, so the caller should explicitly
353 update the global data. */
37bef230 354
6449674e 355static EMACS_INT
971de7fb 356find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
37bef230 357{
8e2911c2 358 EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
37bef230 359
1fd1cc2f
RS
360 if (!open_paren_in_column_0_is_defun_start)
361 {
362 find_start_value_byte = BEGV_BYTE;
363 return BEGV;
364 }
365
37bef230
RS
366 /* Use previous finding, if it's valid and applies to this inquiry. */
367 if (current_buffer == find_start_buffer
368 /* Reuse the defun-start even if POS is a little farther on.
369 POS might be in the next defun, but that's ok.
370 Our value may not be the best possible, but will still be usable. */
371 && pos <= find_start_pos + 1000
372 && pos >= find_start_value
373 && BEGV == find_start_begv
374 && MODIFF == find_start_modiff)
375 return find_start_value;
376
377 /* Back up to start of line. */
6a140a74 378 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 379
195d1361
RS
380 /* We optimize syntax-table lookup for rare updates. Thus we accept
381 only those `^\s(' which are good in global _and_ text-property
382 syntax-tables. */
d48cd3f4 383 SETUP_BUFFER_SYNTAX_TABLE ();
1fd1cc2f 384 while (PT > BEGV)
37bef230 385 {
ab229fdd
AS
386 int c;
387
1fd1cc2f
RS
388 /* Open-paren at start of line means we may have found our
389 defun-start. */
327719ee 390 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 391 if (SYNTAX (c) == Sopen)
195d1361 392 {
1fd1cc2f 393 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
327719ee 394 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 395 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
396 break;
397 /* Now fallback to the default value. */
d48cd3f4 398 SETUP_BUFFER_SYNTAX_TABLE ();
195d1361 399 }
1fd1cc2f
RS
400 /* Move to beg of previous line. */
401 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
402 }
403
404 /* Record what we found, for the next try. */
6a140a74
RS
405 find_start_value = PT;
406 find_start_value_byte = PT_BYTE;
37bef230
RS
407 find_start_buffer = current_buffer;
408 find_start_modiff = MODIFF;
409 find_start_begv = BEGV;
410 find_start_pos = pos;
411
6a140a74
RS
412 TEMP_SET_PT_BOTH (opoint, opoint_byte);
413
37bef230
RS
414 return find_start_value;
415}
416\f
f902a008
RS
417/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
418
419static int
971de7fb 420prev_char_comend_first (int pos, int pos_byte)
f902a008
RS
421{
422 int c, val;
423
424 DEC_BOTH (pos, pos_byte);
425 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
426 c = FETCH_CHAR (pos_byte);
427 val = SYNTAX_COMEND_FIRST (c);
428 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
429 return val;
430}
431
432/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
433
3f679f55
SM
434/* static int
435 * prev_char_comstart_first (pos, pos_byte)
436 * int pos, pos_byte;
437 * {
438 * int c, val;
7d0393cf 439 *
3f679f55
SM
440 * DEC_BOTH (pos, pos_byte);
441 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
442 * c = FETCH_CHAR (pos_byte);
443 * val = SYNTAX_COMSTART_FIRST (c);
444 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
445 * return val;
446 * } */
f902a008 447
6a140a74
RS
448/* Checks whether charpos FROM is at the end of a comment.
449 FROM_BYTE is the bytepos corresponding to FROM.
450 Do not move back before STOP.
451
452 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
453 return -1 otherwise.
454
455 If successful, store the charpos of the comment's beginning
456 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
457
458 Global syntax data remains valid for backward search starting at
459 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
460
461static int
971de7fb 462back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
195d1361
RS
463{
464 /* Look back, counting the parity of string-quotes,
465 and recording the comment-starters seen.
466 When we reach a safe place, assume that's not in a string;
467 then step the main scan to the earliest comment-starter seen
468 an even number of string quotes away from the safe place.
469
470 OFROM[I] is position of the earliest comment-starter seen
471 which is I+2X quotes from the comment-end.
472 PARITY is current parity of quotes from the comment end. */
3ee5041c 473 int string_style = -1; /* Presumed outside of any string. */
195d1361 474 int string_lossage = 0;
f7c436c1 475 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 476 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
477 comment-starter we might see later could be a false positive (hidden
478 inside another comment).
479 Test case: { a (* b } c (* d *) */
480 int comment_lossage = 0;
8e2911c2
AS
481 EMACS_INT comment_end = from;
482 EMACS_INT comment_end_byte = from_byte;
483 EMACS_INT comstart_pos = 0;
484 EMACS_INT comstart_byte;
eb35b628
RS
485 /* Place where the containing defun starts,
486 or 0 if we didn't come across it yet. */
8e2911c2
AS
487 EMACS_INT defun_start = 0;
488 EMACS_INT defun_start_byte = 0;
195d1361 489 register enum syntaxcode code;
95ff8dfc 490 int nesting = 1; /* current comment nesting */
ea315ed6 491 int c;
3f679f55
SM
492 int syntax = 0;
493
494 /* FIXME: A }} comment-ender style leads to incorrect behavior
495 in the case of {{ c }}} because we ignore the last two chars which are
496 assumed to be comment-enders although they aren't. */
195d1361
RS
497
498 /* At beginning of range to scan, we're outside of strings;
499 that determines quote parity to the comment-end. */
500 while (from != stop)
501 {
3f679f55
SM
502 int temp_byte, prev_syntax;
503 int com2start, com2end;
6a140a74 504
195d1361 505 /* Move back and examine a character. */
6a140a74 506 DEC_BOTH (from, from_byte);
195d1361
RS
507 UPDATE_SYNTAX_TABLE_BACKWARD (from);
508
3f679f55 509 prev_syntax = syntax;
b7dbcc19 510 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3f679f55 511 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
512 code = SYNTAX (c);
513
3f679f55
SM
514 /* Check for 2-char comment markers. */
515 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
516 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
517 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
518 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
519 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
520 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
521 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
522
523 /* Nasty cases with overlapping 2-char comment markers:
524 - snmp-mode: -- c -- foo -- c --
525 --- c --
526 ------ c --
527 - c-mode: *||*
528 |* *|* *|
529 |*| |* |*|
530 /// */
531
532 /* If a 2-char comment sequence partly overlaps with another,
533 we don't try to be clever. */
534 if (from > stop && (com2end || com2start))
195d1361 535 {
3f679f55
SM
536 int next = from, next_byte = from_byte, next_c, next_syntax;
537 DEC_BOTH (next, next_byte);
538 UPDATE_SYNTAX_TABLE_BACKWARD (next);
b7dbcc19 539 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
3f679f55
SM
540 next_syntax = SYNTAX_WITH_FLAGS (next_c);
541 if (((com2start || comnested)
542 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
543 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
544 || ((com2end || comnested)
545 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
546 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
547 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
548 goto lossage;
549 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 550 }
3f679f55
SM
551
552 if (com2start && comstart_pos == 0)
553 /* We're looking at a comment starter. But it might be a comment
554 ender as well (see snmp-mode). The first time we see one, we
555 need to consider it as a comment starter,
556 and the subsequent times as a comment ender. */
557 com2end = 0;
558
559 /* Turn a 2-char comment sequences into the appropriate syntax. */
560 if (com2end)
561 code = Sendcomment;
562 else if (com2start)
563 code = Scomment;
564 /* Ignore comment starters of a different style. */
565 else if (code == Scomment
566 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
567 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 568 continue;
195d1361 569
9828a477
KH
570 /* Ignore escaped characters, except comment-enders. */
571 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
572 continue;
573
3ee5041c 574 switch (code)
195d1361 575 {
3ee5041c
SM
576 case Sstring_fence:
577 case Scomment_fence:
578 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
579 case Sstring:
580 /* Track parity of quotes. */
581 if (string_style == -1)
582 /* Entering a string. */
583 string_style = c;
584 else if (string_style == c)
585 /* Leaving the string. */
586 string_style = -1;
587 else
588 /* If we have two kinds of string delimiters.
589 There's no way to grok this scanning backwards. */
195d1361 590 string_lossage = 1;
3ee5041c 591 break;
7d0393cf 592
3ee5041c
SM
593 case Scomment:
594 /* We've already checked that it is the relevant comstyle. */
f7c436c1 595 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
596 /* There are odd string quotes involved, so let's be careful.
597 Test case in Pascal: " { " a { " } */
598 goto lossage;
599
f7c436c1
SM
600 if (!comnested)
601 {
602 /* Record best comment-starter so far. */
603 comstart_pos = from;
604 comstart_byte = from_byte;
605 }
606 else if (--nesting <= 0)
95ff8dfc
RS
607 /* nested comments have to be balanced, so we don't need to
608 keep looking for earlier ones. We use here the same (slightly
609 incorrect) reasoning as below: since it is followed by uniform
610 paired string quotes, this comment-start has to be outside of
611 strings, else the comment-end itself would be inside a string. */
612 goto done;
3ee5041c
SM
613 break;
614
02010917 615 case Sendcomment:
3f679f55 616 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 617 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 618 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
619 /* This is the same style of comment ender as ours. */
620 {
621 if (comnested)
622 nesting++;
623 else
624 /* Anything before that can't count because it would match
625 this comment-ender rather than ours. */
626 from = stop; /* Break out of the loop. */
627 }
f7c436c1
SM
628 else if (comstart_pos != 0 || c != '\n')
629 /* We're mixing comment styles here, so we'd better be careful.
630 The (comstart_pos != 0 || c != '\n') check is not quite correct
631 (we should just always set comment_lossage), but removing it
632 would imply that any multiline comment in C would go through
633 lossage, which seems overkill.
634 The failure should only happen in the rare cases such as
635 { (* } *) */
636 comment_lossage = 1;
02010917 637 break;
195d1361 638
02010917
SM
639 case Sopen:
640 /* Assume a defun-start point is outside of strings. */
641 if (open_paren_in_column_0_is_defun_start
642 && (from == stop
643 || (temp_byte = dec_bytepos (from_byte),
644 FETCH_CHAR (temp_byte) == '\n')))
645 {
646 defun_start = from;
647 defun_start_byte = from_byte;
648 from = stop; /* Break out of the loop. */
649 }
eb35b628 650 break;
02010917
SM
651
652 default:
f7c436c1 653 break;
eb35b628 654 }
195d1361
RS
655 }
656
657 if (comstart_pos == 0)
658 {
659 from = comment_end;
6a140a74 660 from_byte = comment_end_byte;
195d1361
RS
661 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
662 }
f7c436c1
SM
663 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
664 or `done'), then we've found the beginning of the non-nested comment. */
665 else if (1) /* !comnested */
195d1361
RS
666 {
667 from = comstart_pos;
6a140a74 668 from_byte = comstart_byte;
789f3320 669 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
670 }
671 else
672 {
3ee5041c
SM
673 struct lisp_parse_state state;
674 lossage:
195d1361
RS
675 /* We had two kinds of string delimiters mixed up
676 together. Decode this going forwards.
02010917 677 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
678 to the one in question; this records where we
679 last passed a comment starter. */
eb35b628
RS
680 /* If we did not already find the defun start, find it now. */
681 if (defun_start == 0)
682 {
683 defun_start = find_defun_start (comment_end, comment_end_byte);
684 defun_start_byte = find_start_value_byte;
685 }
02010917 686 do
195d1361 687 {
02010917
SM
688 scan_sexps_forward (&state,
689 defun_start, defun_start_byte,
690 comment_end, -10000, 0, Qnil, 0);
691 defun_start = comment_end;
692 if (state.incomment == (comnested ? 1 : -1)
693 && state.comstyle == comstyle)
694 from = state.comstr_start;
695 else
696 {
697 from = comment_end;
698 if (state.incomment)
699 /* If comment_end is inside some other comment, maybe ours
700 is nested, so we need to try again from within the
701 surrounding comment. Example: { a (* " *) */
702 {
703 /* FIXME: We should advance by one or two chars. */
704 defun_start = state.comstr_start + 2;
705 defun_start_byte = CHAR_TO_BYTE (defun_start);
706 }
707 }
708 } while (defun_start < comment_end);
709
6a140a74 710 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
711 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
712 }
7d0393cf 713
95ff8dfc 714 done:
6a140a74
RS
715 *charpos_ptr = from;
716 *bytepos_ptr = from_byte;
717
1aa963c8 718 return (from == comment_end) ? -1 : from;
195d1361
RS
719}
720\f
8489eb67 721DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
722 doc: /* Return t if OBJECT is a syntax table.
723Currently, any char-table counts as a syntax table. */)
5842a27b 724 (Lisp_Object object)
8489eb67 725{
2203e1e8 726 if (CHAR_TABLE_P (object)
e704cb4b 727 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
728 return Qt;
729 return Qnil;
730}
731
8ea151b2 732static void
971de7fb 733check_syntax_table (Lisp_Object obj)
8489eb67 734{
47f5f6ae
KS
735 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
736 Qsyntax_table_p, obj);
7d0393cf 737}
8489eb67 738
8489eb67 739DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
740 doc: /* Return the current syntax table.
741This is the one specified by the current buffer. */)
5842a27b 742 (void)
8489eb67
RS
743{
744 return current_buffer->syntax_table;
745}
746
747DEFUN ("standard-syntax-table", Fstandard_syntax_table,
748 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
749 doc: /* Return the standard syntax table.
750This is the one used for new buffers. */)
5842a27b 751 (void)
8489eb67
RS
752{
753 return Vstandard_syntax_table;
754}
755
756DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
757 doc: /* Construct a new syntax table and return it.
758It is a copy of the TABLE, which defaults to the standard syntax table. */)
5842a27b 759 (Lisp_Object table)
8489eb67 760{
8ea151b2
RS
761 Lisp_Object copy;
762
265a9e55 763 if (!NILP (table))
8ea151b2
RS
764 check_syntax_table (table);
765 else
766 table = Vstandard_syntax_table;
767
768 copy = Fcopy_sequence (table);
0f867324
RS
769
770 /* Only the standard syntax table should have a default element.
771 Other syntax tables should inherit from parents instead. */
772 XCHAR_TABLE (copy)->defalt = Qnil;
773
774 /* Copied syntax tables should all have parents.
775 If we copied one with no parent, such as the standard syntax table,
776 use the standard syntax table as the copy's parent. */
777 if (NILP (XCHAR_TABLE (copy)->parent))
778 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 779 return copy;
8489eb67
RS
780}
781
782DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
783 doc: /* Select a new syntax table for the current buffer.
784One argument, a syntax table. */)
5842a27b 785 (Lisp_Object table)
8489eb67 786{
4e4dfa78 787 int idx;
8ea151b2 788 check_syntax_table (table);
8489eb67
RS
789 current_buffer->syntax_table = table;
790 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
791 idx = PER_BUFFER_VAR_IDX (syntax_table);
792 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
793 return table;
794}
795\f
796/* Convert a letter which signifies a syntax code
797 into the code it signifies.
195d1361 798 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
799
800unsigned char syntax_spec_code[0400] =
801 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
802 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
803 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
804 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 805 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
806 (char) Smath, 0377, 0377, (char) Squote,
807 (char) Sopen, (char) Sclose, 0377, 0377,
808 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
809 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
810 0377, 0377, 0377, 0377,
811 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 812 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
813 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
814 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
815 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
816 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
817 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
818 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 819 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
820 };
821
195d1361 822/* Indexed by syntax code, give the letter that describes it. */
8489eb67 823
195d1361 824char syntax_code_spec[16] =
8489eb67 825 {
195d1361
RS
826 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
827 '!', '|'
8489eb67 828 };
93da5fff
KH
829
830/* Indexed by syntax code, give the object (cons of syntax code and
831 nil) to be stored in syntax table. Since these objects can be
832 shared among syntax tables, we generate them in advance. By
833 sharing objects, the function `describe-syntax' can give a more
834 compact listing. */
835static Lisp_Object Vsyntax_code_object;
836
8489eb67
RS
837\f
838DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93 839 doc: /* Return the syntax code of CHARACTER, described by a character.
e2b6daf4
CY
840For example, if CHARACTER is a word constituent, the
841character `w' (119) is returned.
fdb82f93
PJ
842The characters that correspond to various syntax codes
843are listed in the documentation of `modify-syntax-entry'. */)
5842a27b 844 (Lisp_Object character)
8489eb67 845{
8ea151b2 846 int char_int;
774b9a60 847 CHECK_CHARACTER (character);
2203e1e8 848 char_int = XINT (character);
d48cd3f4 849 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 850 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
851}
852
853DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93 854 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
5842a27b 855 (Lisp_Object character)
beefa22e 856{
8ea151b2 857 int char_int, code;
b7826503 858 CHECK_NUMBER (character);
2203e1e8 859 char_int = XINT (character);
d48cd3f4 860 SETUP_BUFFER_SYNTAX_TABLE ();
8ea151b2 861 code = SYNTAX (char_int);
a8bd7cd8 862 if (code == Sopen || code == Sclose)
2e34157c 863 return SYNTAX_MATCH (char_int);
beefa22e 864 return Qnil;
8489eb67
RS
865}
866
c65adb44 867DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
868 doc: /* Convert a syntax specification STRING into syntax cell form.
869STRING should be a string as it is allowed as argument of
870`modify-syntax-entry'. Value is the equivalent cons cell
bce3dc36 871\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
fdb82f93 872text property. */)
5842a27b 873 (Lisp_Object string)
8489eb67 874{
2e567bd3 875 register const unsigned char *p;
8489eb67 876 register enum syntaxcode code;
247e20a8 877 int val;
8ea151b2 878 Lisp_Object match;
8489eb67 879
b7826503 880 CHECK_STRING (string);
8489eb67 881
d5db4077 882 p = SDATA (string);
8489eb67
RS
883 code = (enum syntaxcode) syntax_spec_code[*p++];
884 if (((int) code & 0377) == 0377)
b764a653 885 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 886
8ea151b2 887 if (code == Sinherit)
c65adb44 888 return Qnil;
8ea151b2
RS
889
890 if (*p)
d1be9f0f 891 {
93da5fff 892 int len;
62a6e103 893 int character = STRING_CHAR_AND_LENGTH (p, len);
93da5fff 894 XSETINT (match, character);
d1be9f0f
RS
895 if (XFASTINT (match) == ' ')
896 match = Qnil;
93da5fff 897 p += len;
d1be9f0f
RS
898 }
899 else
8ea151b2 900 match = Qnil;
8489eb67 901
8ea151b2 902 val = (int) code;
8489eb67
RS
903 while (*p)
904 switch (*p++)
905 {
906 case '1':
247e20a8 907 val |= 1 << 16;
8489eb67
RS
908 break;
909
910 case '2':
247e20a8 911 val |= 1 << 17;
8489eb67
RS
912 break;
913
914 case '3':
247e20a8 915 val |= 1 << 18;
8489eb67
RS
916 break;
917
918 case '4':
247e20a8 919 val |= 1 << 19;
8489eb67
RS
920 break;
921
922 case 'p':
247e20a8 923 val |= 1 << 20;
8489eb67 924 break;
e5d4f4dc
RS
925
926 case 'b':
247e20a8 927 val |= 1 << 21;
e5d4f4dc 928 break;
95ff8dfc
RS
929
930 case 'n':
931 val |= 1 << 22;
932 break;
8489eb67 933 }
7d0393cf 934
93da5fff 935 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 936 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
937 else
938 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
939 return Fcons (make_number (val), match);
940}
941
fdb82f93 942/* I really don't know why this is interactive
2c9e1900 943 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 944DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 945 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 946 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 947The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93 948 the current buffer's syntax table.
5c7b02ab 949CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
fedc6ab5 950in the range MIN to MAX are changed.
fdb82f93
PJ
951The first character of NEWENTRY should be one of the following:
952 Space or - whitespace syntax. w word constituent.
953 _ symbol constituent. . punctuation.
954 ( open-parenthesis. ) close-parenthesis.
955 " string quote. \\ escape.
956 $ paired delimiter. ' expression quote or prefix operator.
957 < comment starter. > comment ender.
958 / character-quote. @ inherit from `standard-syntax-table'.
959 | generic string fence. ! generic comment fence.
960
961Only single-character comment start and end sequences are represented thus.
962Two-character sequences are represented as described below.
963The second character of NEWENTRY is the matching parenthesis,
964 used only if the first character is `(' or `)'.
965Any additional characters are flags.
966Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
967 1 means CHAR is the start of a two-char comment start sequence.
968 2 means CHAR is the second character of such a sequence.
969 3 means CHAR is the start of a two-char comment end sequence.
970 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
971
972There can be up to two orthogonal comment sequences. This is to support
973language modes such as C++. By default, all comment sequences are of style
974a, but you can set the comment sequence style to b (on the second character
975of a comment-start, or the first character of a comment-end sequence) using
976this flag:
a50a10a0
PJ
977 b means CHAR is part of comment sequence b.
978 n means CHAR is part of a nestable comment sequence.
fdb82f93 979
a50a10a0 980 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 981 such characters are treated as whitespace when they occur
a50a10a0 982 between expressions.
fedc6ab5 983usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
5842a27b 984 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
c65adb44 985{
5c7b02ab
KH
986 if (CONSP (c))
987 {
8f924df7
KH
988 CHECK_CHARACTER_CAR (c);
989 CHECK_CHARACTER_CDR (c);
5c7b02ab
KH
990 }
991 else
992 CHECK_CHARACTER (c);
c65adb44
SM
993
994 if (NILP (syntax_table))
995 syntax_table = current_buffer->syntax_table;
996 else
997 check_syntax_table (syntax_table);
8489eb67 998
5c7b02ab
KH
999 newentry = Fstring_to_syntax (newentry);
1000 if (CONSP (c))
1001 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1002 else
1003 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
e5b94d44
CY
1004
1005 /* We clear the regexp cache, since character classes can now have
1006 different values from those in the compiled regexps.*/
1007 clear_regexp_cache ();
1008
8489eb67
RS
1009 return Qnil;
1010}
1011\f
1012/* Dump syntax table to buffer in human-readable format */
1013
62abe9cb
SM
1014DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1015 Sinternal_describe_syntax_value, 1, 1, 0,
1016 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
5842a27b 1017 (Lisp_Object syntax)
8489eb67
RS
1018{
1019 register enum syntaxcode code;
e6365855 1020 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1021 char str[2];
62abe9cb 1022 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1023
8ea151b2
RS
1024 if (NILP (value))
1025 {
62abe9cb
SM
1026 insert_string ("default");
1027 return syntax;
8ea151b2
RS
1028 }
1029
908b7fea
KH
1030 if (CHAR_TABLE_P (value))
1031 {
62abe9cb
SM
1032 insert_string ("deeper char-table ...");
1033 return syntax;
908b7fea
KH
1034 }
1035
8ea151b2
RS
1036 if (!CONSP (value))
1037 {
62abe9cb
SM
1038 insert_string ("invalid");
1039 return syntax;
8ea151b2
RS
1040 }
1041
c1d497be
KR
1042 first = XCAR (value);
1043 match_lisp = XCDR (value);
8ea151b2
RS
1044
1045 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1046 {
62abe9cb
SM
1047 insert_string ("invalid");
1048 return syntax;
8489eb67
RS
1049 }
1050
e704cb4b 1051 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1052 start1 = (XINT (first) >> 16) & 1;
1053 start2 = (XINT (first) >> 17) & 1;
1054 end1 = (XINT (first) >> 18) & 1;
1055 end2 = (XINT (first) >> 19) & 1;
1056 prefix = (XINT (first) >> 20) & 1;
1057 comstyle = (XINT (first) >> 21) & 1;
e6365855 1058 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1059
1060 if ((int) code < 0 || (int) code >= (int) Smax)
1061 {
1062 insert_string ("invalid");
62abe9cb 1063 return syntax;
8489eb67
RS
1064 }
1065 desc = syntax_code_spec[(int) code];
1066
1067 str[0] = desc, str[1] = 0;
1068 insert (str, 1);
1069
93da5fff
KH
1070 if (NILP (match_lisp))
1071 insert (" ", 1);
1072 else
1073 insert_char (XINT (match_lisp));
8489eb67 1074
8489eb67
RS
1075 if (start1)
1076 insert ("1", 1);
1077 if (start2)
1078 insert ("2", 1);
1079
1080 if (end1)
1081 insert ("3", 1);
1082 if (end2)
1083 insert ("4", 1);
1084
1085 if (prefix)
1086 insert ("p", 1);
e5d4f4dc
RS
1087 if (comstyle)
1088 insert ("b", 1);
e6365855
SM
1089 if (comnested)
1090 insert ("n", 1);
8489eb67
RS
1091
1092 insert_string ("\twhich means: ");
1093
0220c518 1094 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1095 {
1096 case Swhitespace:
1097 insert_string ("whitespace"); break;
1098 case Spunct:
1099 insert_string ("punctuation"); break;
1100 case Sword:
1101 insert_string ("word"); break;
1102 case Ssymbol:
1103 insert_string ("symbol"); break;
1104 case Sopen:
1105 insert_string ("open"); break;
1106 case Sclose:
1107 insert_string ("close"); break;
1108 case Squote:
d671382e 1109 insert_string ("prefix"); break;
8489eb67
RS
1110 case Sstring:
1111 insert_string ("string"); break;
1112 case Smath:
1113 insert_string ("math"); break;
1114 case Sescape:
1115 insert_string ("escape"); break;
1116 case Scharquote:
1117 insert_string ("charquote"); break;
1118 case Scomment:
1119 insert_string ("comment"); break;
1120 case Sendcomment:
1121 insert_string ("endcomment"); break;
d671382e
SM
1122 case Sinherit:
1123 insert_string ("inherit"); break;
1124 case Scomment_fence:
1125 insert_string ("comment fence"); break;
1126 case Sstring_fence:
1127 insert_string ("string fence"); break;
8489eb67
RS
1128 default:
1129 insert_string ("invalid");
62abe9cb 1130 return syntax;
8489eb67
RS
1131 }
1132
8ea151b2 1133 if (!NILP (match_lisp))
8489eb67
RS
1134 {
1135 insert_string (", matches ");
8ea151b2 1136 insert_char (XINT (match_lisp));
8489eb67
RS
1137 }
1138
1139 if (start1)
1140 insert_string (",\n\t is the first character of a comment-start sequence");
1141 if (start2)
1142 insert_string (",\n\t is the second character of a comment-start sequence");
1143
1144 if (end1)
1145 insert_string (",\n\t is the first character of a comment-end sequence");
1146 if (end2)
1147 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1148 if (comstyle)
1149 insert_string (" (comment style b)");
e6365855
SM
1150 if (comnested)
1151 insert_string (" (nestable)");
e5d4f4dc 1152
8489eb67
RS
1153 if (prefix)
1154 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1155
62abe9cb
SM
1156 return syntax;
1157}
8489eb67 1158\f
195d1361
RS
1159int parse_sexp_ignore_comments;
1160
8f924df7
KH
1161/* Char-table of functions that find the next or previous word
1162 boundary. */
1163Lisp_Object Vfind_word_boundary_function_table;
869bb237 1164
8489eb67
RS
1165/* Return the position across COUNT words from FROM.
1166 If that many words cannot be found before the end of the buffer, return 0.
1167 COUNT negative means scan backward and stop at word beginning. */
1168
dfcf069d 1169int
971de7fb 1170scan_words (register int from, register int count)
8489eb67
RS
1171{
1172 register int beg = BEGV;
1173 register int end = ZV;
6a140a74 1174 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1175 register enum syntaxcode code;
1176 int ch0, ch1;
8f924df7 1177 Lisp_Object func, script, pos;
8489eb67
RS
1178
1179 immediate_quit = 1;
1180 QUIT;
1181
195d1361
RS
1182 SETUP_SYNTAX_TABLE (from, count);
1183
8489eb67
RS
1184 while (count > 0)
1185 {
1186 while (1)
1187 {
1188 if (from == end)
1189 {
1190 immediate_quit = 0;
1191 return 0;
1192 }
195d1361 1193 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1194 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1195 code = SYNTAX (ch0);
6a140a74 1196 INC_BOTH (from, from_byte);
8489eb67
RS
1197 if (words_include_escapes
1198 && (code == Sescape || code == Scharquote))
1199 break;
1200 if (code == Sword)
1201 break;
8489eb67 1202 }
93da5fff
KH
1203 /* Now CH0 is a character which begins a word and FROM is the
1204 position of the next character. */
8f924df7 1205 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
869bb237 1206 if (! NILP (Ffboundp (func)))
8489eb67 1207 {
869bb237 1208 pos = call2 (func, make_number (from - 1), make_number (end));
8f924df7
KH
1209 if (INTEGERP (pos) && XINT (pos) > from)
1210 {
1211 from = XINT (pos);
1212 from_byte = CHAR_TO_BYTE (from);
1213 }
8489eb67 1214 }
869bb237 1215 else
8cb8232a 1216 {
8cb8232a
KH
1217 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1218 while (1)
1219 {
1220 if (from == end) break;
1221 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1222 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1223 code = SYNTAX (ch1);
1224 if ((code != Sword
1225 && (! words_include_escapes
1226 || (code != Sescape && code != Scharquote)))
2b70a11c 1227 || word_boundary_p (ch0, ch1))
869bb237 1228 break;
8cb8232a
KH
1229 INC_BOTH (from, from_byte);
1230 ch0 = ch1;
1231 }
8489eb67
RS
1232 }
1233 count--;
1234 }
1235 while (count < 0)
1236 {
1237 while (1)
1238 {
1239 if (from == beg)
1240 {
1241 immediate_quit = 0;
1242 return 0;
1243 }
6a140a74 1244 DEC_BOTH (from, from_byte);
195d1361 1245 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 1246 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1247 code = SYNTAX (ch1);
8489eb67
RS
1248 if (words_include_escapes
1249 && (code == Sescape || code == Scharquote))
1250 break;
1251 if (code == Sword)
1252 break;
8489eb67 1253 }
93da5fff
KH
1254 /* Now CH1 is a character which ends a word and FROM is the
1255 position of it. */
8f924df7 1256 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
869bb237 1257 if (! NILP (Ffboundp (func)))
8f924df7 1258 {
869bb237 1259 pos = call2 (func, make_number (from), make_number (beg));
8f924df7
KH
1260 if (INTEGERP (pos) && XINT (pos) < from)
1261 {
1262 from = XINT (pos);
1263 from_byte = CHAR_TO_BYTE (from);
1264 }
8489eb67 1265 }
869bb237 1266 else
8489eb67 1267 {
8cb8232a
KH
1268 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1269 while (1)
1270 {
8cb8232a
KH
1271 if (from == beg)
1272 break;
262be72a 1273 DEC_BOTH (from, from_byte);
8cb8232a 1274 UPDATE_SYNTAX_TABLE_BACKWARD (from);
262be72a 1275 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1276 code = SYNTAX (ch0);
1277 if ((code != Sword
1278 && (! words_include_escapes
1279 || (code != Sescape && code != Scharquote)))
2b70a11c 1280 || word_boundary_p (ch0, ch1))
262be72a
MB
1281 {
1282 INC_BOTH (from, from_byte);
1283 break;
1284 }
8cb8232a
KH
1285 ch1 = ch0;
1286 }
8489eb67
RS
1287 }
1288 count++;
1289 }
1290
1291 immediate_quit = 0;
1292
1293 return from;
1294}
1295
1e9dbb5f 1296DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
fdb82f93
PJ
1297 doc: /* Move point forward ARG words (backward if ARG is negative).
1298Normally returns t.
1299If an edge of the buffer or a field boundary is reached, point is left there
1300and the function returns nil. Field boundaries are not noticed if
1301`inhibit-field-text-motion' is non-nil. */)
5842a27b 1302 (Lisp_Object arg)
8489eb67 1303{
c096ae4d 1304 Lisp_Object tmp;
2c6ea900 1305 int orig_val, val;
8489eb67 1306
839966f3
KH
1307 if (NILP (arg))
1308 XSETFASTINT (arg, 1);
1309 else
1310 CHECK_NUMBER (arg);
1311
1312 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1313 if (! orig_val)
839966f3 1314 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1315
b8855607 1316 /* Avoid jumping out of an input field. */
c096ae4d
SM
1317 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1318 Qt, Qnil, Qnil);
1319 val = XFASTINT (tmp);
7d0393cf 1320
8489eb67 1321 SET_PT (val);
e6d8341f 1322 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1323}
1324\f
dd4c5104 1325Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
195d1361
RS
1326
1327DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1328 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1329STRING is like the inside of a `[...]' in a regular expression
1330except that `]' is never special and `\\' quotes `^', `-' or `\\'
7087d5e9 1331 (but not at the end of a range; quoting is never needed there).
fdb82f93
PJ
1332Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1333With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1334Char classes, e.g. `[:alpha:]', are supported.
1335
1336Returns the distance traveled, either zero or positive. */)
5842a27b 1337 (Lisp_Object string, Lisp_Object lim)
195d1361 1338{
327719ee 1339 return skip_chars (1, string, lim, 1);
195d1361
RS
1340}
1341
1342DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1343 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1344See `skip-chars-forward' for details.
1345Returns the distance traveled, either zero or negative. */)
5842a27b 1346 (Lisp_Object string, Lisp_Object lim)
195d1361 1347{
327719ee 1348 return skip_chars (0, string, lim, 1);
195d1361
RS
1349}
1350
1351DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1352 doc: /* Move point forward across chars in specified syntax classes.
1353SYNTAX is a string of syntax code characters.
1354Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1355If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1356This function returns the distance traveled, either zero or positive. */)
5842a27b 1357 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1358{
b7dbcc19 1359 return skip_syntaxes (1, syntax, lim);
195d1361
RS
1360}
1361
1362DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1363 doc: /* Move point backward across chars in specified syntax classes.
1364SYNTAX is a string of syntax code characters.
1365Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1366If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1367This function returns the distance traveled, either zero or negative. */)
5842a27b 1368 (Lisp_Object syntax, Lisp_Object lim)
195d1361 1369{
b7dbcc19 1370 return skip_syntaxes (0, syntax, lim);
195d1361
RS
1371}
1372
6a140a74 1373static Lisp_Object
971de7fb 1374skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
195d1361 1375{
195d1361
RS
1376 register unsigned int c;
1377 unsigned char fastmap[0400];
b7dbcc19 1378 /* Store the ranges of non-ASCII characters. */
5df4982e 1379 int *char_ranges;
7e68b0ea 1380 int n_char_ranges = 0;
195d1361 1381 int negate = 0;
4101e6fe 1382 register int i, i_byte;
b7dbcc19
KH
1383 /* Set to 1 if the current buffer is multibyte and the region
1384 contains non-ASCII chars. */
1385 int multibyte;
1386 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1387 chars. */
1674d9a2
RS
1388 int string_multibyte;
1389 int size_byte;
2e567bd3 1390 const unsigned char *str;
82d497fc 1391 int len;
a1bc88d4 1392 Lisp_Object iso_classes;
195d1361 1393
b7826503 1394 CHECK_STRING (string);
a1bc88d4 1395 iso_classes = Qnil;
82d497fc 1396
195d1361
RS
1397 if (NILP (lim))
1398 XSETINT (lim, forwardp ? ZV : BEGV);
1399 else
b7826503 1400 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1401
1402 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1403 if (XINT (lim) > ZV)
1404 XSETFASTINT (lim, ZV);
1405 if (XINT (lim) < BEGV)
1406 XSETFASTINT (lim, BEGV);
1407
b7dbcc19 1408 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
92eaa22e 1409 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
8f924df7 1410 string_multibyte = SBYTES (string) > SCHARS (string);
b7dbcc19 1411
72af86bd 1412 memset (fastmap, 0, sizeof fastmap);
195d1361 1413
8f924df7
KH
1414 str = SDATA (string);
1415 size_byte = SBYTES (string);
4101e6fe 1416
82d497fc 1417 i_byte = 0;
13090112 1418 if (i_byte < size_byte
d5db4077 1419 && SREF (string, 0) == '^')
195d1361 1420 {
82d497fc 1421 negate = 1; i_byte++;
195d1361
RS
1422 }
1423
1424 /* Find the characters specified and set their elements of fastmap.
b7dbcc19 1425 Handle backslashes and ranges specially.
195d1361 1426
b7dbcc19
KH
1427 If STRING contains non-ASCII characters, setup char_ranges for
1428 them and use fastmap only for their leading codes. */
4101e6fe 1429
b7dbcc19 1430 if (! string_multibyte)
195d1361 1431 {
b7dbcc19 1432 int string_has_eight_bit = 0;
4101e6fe 1433
b7dbcc19
KH
1434 /* At first setup fastmap. */
1435 while (i_byte < size_byte)
1436 {
1437 c = str[i_byte++];
1438
a1bc88d4
RS
1439 if (handle_iso_classes && c == '['
1440 && i_byte < size_byte
327719ee 1441 && str[i_byte] == ':')
a1bc88d4
RS
1442 {
1443 const unsigned char *class_beg = str + i_byte + 1;
1444 const unsigned char *class_end = class_beg;
b3bda4fd 1445 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1446 /* Leave room for the null. */
a1bc88d4
RS
1447 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1448 re_wctype_t cc;
1449
1450 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1451 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1452
b3bda4fd
KS
1453 while (class_end < class_limit
1454 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1455 class_end++;
1456
b3bda4fd
KS
1457 if (class_end == class_beg
1458 || *class_end != ':' || class_end[1] != ']')
1459 goto not_a_class_name;
a1bc88d4 1460
72af86bd 1461 memcpy (class_name, class_beg, class_end - class_beg);
a1bc88d4
RS
1462 class_name[class_end - class_beg] = 0;
1463
1464 cc = re_wctype (class_name);
1465 if (cc == 0)
1466 error ("Invalid ISO C character class");
1467
1468 iso_classes = Fcons (make_number (cc), iso_classes);
1469
1470 i_byte = class_end + 2 - str;
1471 continue;
1472 }
1473
b3bda4fd 1474 not_a_class_name:
b7dbcc19
KH
1475 if (c == '\\')
1476 {
13090112 1477 if (i_byte == size_byte)
4101e6fe
RS
1478 break;
1479
5c7b02ab 1480 c = str[i_byte++];
195d1361 1481 }
839966f3
KH
1482 /* Treat `-' as range character only if another character
1483 follows. */
1484 if (i_byte + 1 < size_byte
82d497fc 1485 && str[i_byte] == '-')
195d1361 1486 {
9690d026 1487 unsigned int c2;
4101e6fe
RS
1488
1489 /* Skip over the dash. */
82d497fc 1490 i_byte++;
4101e6fe 1491
4101e6fe 1492 /* Get the end of the range. */
5c7b02ab 1493 c2 = str[i_byte++];
b7dbcc19
KH
1494 if (c2 == '\\'
1495 && i_byte < size_byte)
1496 c2 = str[i_byte++];
1497
8f924df7
KH
1498 if (c <= c2)
1499 {
1500 while (c <= c2)
1501 fastmap[c++] = 1;
1502 if (! ASCII_CHAR_P (c2))
1503 string_has_eight_bit = 1;
1504 }
195d1361
RS
1505 }
1506 else
b7dbcc19
KH
1507 {
1508 fastmap[c] = 1;
1509 if (! ASCII_CHAR_P (c))
1510 string_has_eight_bit = 1;
1511 }
1512 }
1513
1514 /* If the current range is multibyte and STRING contains
1515 eight-bit chars, arrange fastmap and setup char_ranges for
1516 the corresponding multibyte chars. */
1517 if (multibyte && string_has_eight_bit)
1518 {
1519 unsigned char fastmap2[0400];
1520 int range_start_byte, range_start_char;
1521
72af86bd
AS
1522 memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1523 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1524 /* We are sure that this loop stops. */
1525 for (i = 0200; ! fastmap2[i]; i++);
4c0354d7 1526 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1527 fastmap[CHAR_LEADING_CODE (c)] = 1;
1528 range_start_byte = i;
1529 range_start_char = c;
f003ca54 1530 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
b7dbcc19
KH
1531 for (i = 129; i < 0400; i++)
1532 {
4c0354d7 1533 c = BYTE8_TO_CHAR (i);
b7dbcc19
KH
1534 fastmap[CHAR_LEADING_CODE (c)] = 1;
1535 if (i - range_start_byte != c - range_start_char)
1536 {
1537 char_ranges[n_char_ranges++] = range_start_char;
1538 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1539 + range_start_char);
1540 range_start_byte = i;
1541 range_start_char = c;
8f924df7 1542 }
b7dbcc19
KH
1543 }
1544 char_ranges[n_char_ranges++] = range_start_char;
1545 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1546 + range_start_char);
195d1361
RS
1547 }
1548 }
f003ca54 1549 else /* STRING is multibyte */
b7dbcc19 1550 {
f003ca54
KH
1551 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1552
b7dbcc19 1553 while (i_byte < size_byte)
195d1361 1554 {
b7dbcc19 1555 unsigned char leading_code;
5c7b02ab 1556
b7dbcc19 1557 leading_code = str[i_byte];
62a6e103 1558 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19 1559 i_byte += len;
5c7b02ab 1560
327719ee
MB
1561 if (handle_iso_classes && c == '['
1562 && i_byte < size_byte
62a6e103 1563 && STRING_CHAR (str + i_byte) == ':')
327719ee
MB
1564 {
1565 const unsigned char *class_beg = str + i_byte + 1;
1566 const unsigned char *class_end = class_beg;
1567 const unsigned char *class_limit = str + size_byte - 2;
1568 /* Leave room for the null. */
1569 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1570 re_wctype_t cc;
1571
1572 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1573 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1574
1575 while (class_end < class_limit
1576 && *class_end >= 'a' && *class_end <= 'z')
1577 class_end++;
1578
1579 if (class_end == class_beg
1580 || *class_end != ':' || class_end[1] != ']')
1581 goto not_a_class_name_multibyte;
1582
72af86bd 1583 memcpy (class_name, class_beg, class_end - class_beg);
327719ee
MB
1584 class_name[class_end - class_beg] = 0;
1585
1586 cc = re_wctype (class_name);
1587 if (cc == 0)
1588 error ("Invalid ISO C character class");
1589
1590 iso_classes = Fcons (make_number (cc), iso_classes);
1591
1592 i_byte = class_end + 2 - str;
1593 continue;
1594 }
1595
1596 not_a_class_name_multibyte:
195d1361
RS
1597 if (c == '\\')
1598 {
13090112 1599 if (i_byte == size_byte)
4101e6fe
RS
1600 break;
1601
b7dbcc19 1602 leading_code = str[i_byte];
62a6e103 1603 c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1604 i_byte += len;
195d1361 1605 }
839966f3
KH
1606 /* Treat `-' as range character only if another character
1607 follows. */
1608 if (i_byte + 1 < size_byte
82d497fc 1609 && str[i_byte] == '-')
195d1361 1610 {
9690d026 1611 unsigned int c2;
b7dbcc19 1612 unsigned char leading_code2;
4101e6fe
RS
1613
1614 /* Skip over the dash. */
82d497fc 1615 i_byte++;
4101e6fe 1616
4101e6fe 1617 /* Get the end of the range. */
b7dbcc19 1618 leading_code2 = str[i_byte];
62a6e103 1619 c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
82d497fc 1620 i_byte += len;
7e68b0ea 1621
b7dbcc19
KH
1622 if (c2 == '\\'
1623 && i_byte < size_byte)
1624 {
1625 leading_code2 = str[i_byte];
62a6e103 1626 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
b7dbcc19
KH
1627 i_byte += len;
1628 }
1629
f003ca54
KH
1630 if (c > c2)
1631 continue;
b7dbcc19 1632 if (ASCII_CHAR_P (c))
06274af5 1633 {
5c7b02ab
KH
1634 while (c <= c2 && c < 0x80)
1635 fastmap[c++] = 1;
b7dbcc19
KH
1636 leading_code = CHAR_LEADING_CODE (c);
1637 }
1638 if (! ASCII_CHAR_P (c))
1639 {
1640 while (leading_code <= leading_code2)
1641 fastmap[leading_code++] = 1;
1642 if (c <= c2)
e39091b6 1643 {
b7dbcc19 1644 char_ranges[n_char_ranges++] = c;
e39091b6 1645 char_ranges[n_char_ranges++] = c2;
06274af5
KH
1646 }
1647 }
195d1361
RS
1648 }
1649 else
7e68b0ea 1650 {
b7dbcc19 1651 if (ASCII_CHAR_P (c))
e39091b6
KH
1652 fastmap[c] = 1;
1653 else
7e68b0ea 1654 {
b7dbcc19 1655 fastmap[leading_code] = 1;
4101e6fe
RS
1656 char_ranges[n_char_ranges++] = c;
1657 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1658 }
1659 }
195d1361 1660 }
b7dbcc19
KH
1661
1662 /* If the current range is unibyte and STRING contains non-ASCII
1663 chars, arrange fastmap for the corresponding unibyte
1664 chars. */
1665
1666 if (! multibyte && n_char_ranges > 0)
1667 {
72af86bd 1668 memset (fastmap + 0200, 0, 0200);
b7dbcc19
KH
1669 for (i = 0; i < n_char_ranges; i += 2)
1670 {
1671 int c1 = char_ranges[i];
1672 int c2 = char_ranges[i + 1];
1673
1674 for (; c1 <= c2; c1++)
2afc21f5
SM
1675 {
1676 int b = CHAR_TO_BYTE_SAFE (c1);
1677 if (b >= 0)
1678 fastmap[b] = 1;
1679 }
b7dbcc19
KH
1680 }
1681 }
195d1361
RS
1682 }
1683
9690d026 1684 /* If ^ was the first character, complement the fastmap. */
195d1361 1685 if (negate)
b7dbcc19
KH
1686 {
1687 if (! multibyte)
1688 for (i = 0; i < sizeof fastmap; i++)
1689 fastmap[i] ^= 1;
1690 else
1691 {
1692 for (i = 0; i < 0200; i++)
1693 fastmap[i] ^= 1;
1694 /* All non-ASCII chars possibly match. */
1695 for (; i < sizeof fastmap; i++)
1696 fastmap[i] = 1;
1697 }
1698 }
195d1361
RS
1699
1700 {
1701 int start_point = PT;
1702 int pos = PT;
6a140a74 1703 int pos_byte = PT_BYTE;
9af7511a
KH
1704 unsigned char *p = PT_ADDR, *endp, *stop;
1705
1706 if (forwardp)
1707 {
1708 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1709 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1710 }
1711 else
1712 {
1713 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1714 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1715 }
195d1361
RS
1716
1717 immediate_quit = 1;
618db430
SM
1718 /* This code may look up syntax tables using macros that rely on the
1719 gl_state object. To make sure this object is not out of date,
1720 let's initialize it manually.
1721 We ignore syntax-table text-properties for now, since that's
1722 what we've done in the past. */
d48cd3f4 1723 SETUP_BUFFER_SYNTAX_TABLE ();
b7dbcc19 1724 if (forwardp)
195d1361 1725 {
b7dbcc19 1726 if (multibyte)
8f924df7 1727 while (1)
b7dbcc19 1728 {
8f924df7 1729 int nbytes;
9af7511a 1730
8f924df7 1731 if (p >= stop)
9af7511a 1732 {
8f924df7 1733 if (p >= endp)
9af7511a 1734 break;
8f924df7
KH
1735 p = GAP_END_ADDR;
1736 stop = endp;
9af7511a 1737 }
62a6e103 1738 c = STRING_CHAR_AND_LENGTH (p, nbytes);
327719ee 1739 if (! NILP (iso_classes) && in_classes (c, iso_classes))
9af7511a 1740 {
327719ee 1741 if (negate)
9af7511a 1742 break;
327719ee
MB
1743 else
1744 goto fwd_ok;
9af7511a 1745 }
9af7511a 1746
8f924df7 1747 if (! fastmap[*p])
b7dbcc19
KH
1748 break;
1749 if (! ASCII_CHAR_P (c))
9af7511a 1750 {
b7dbcc19
KH
1751 /* As we are looking at a multibyte character, we
1752 must look up the character in the table
1753 CHAR_RANGES. If there's no data in the table,
1754 that character is not what we want to skip. */
1755
1756 /* The following code do the right thing even if
1757 n_char_ranges is zero (i.e. no data in
1758 CHAR_RANGES). */
1759 for (i = 0; i < n_char_ranges; i += 2)
1760 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
9af7511a 1761 break;
b7dbcc19
KH
1762 if (!(negate ^ (i < n_char_ranges)))
1763 break;
9af7511a 1764 }
327719ee 1765 fwd_ok:
8f924df7 1766 p += nbytes, pos++, pos_byte += nbytes;
b7dbcc19
KH
1767 }
1768 else
8f924df7
KH
1769 while (1)
1770 {
1771 if (p >= stop)
9af7511a 1772 {
8f924df7 1773 if (p >= endp)
9af7511a 1774 break;
8f924df7
KH
1775 p = GAP_END_ADDR;
1776 stop = endp;
9af7511a 1777 }
327719ee
MB
1778
1779 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1780 {
1781 if (negate)
1782 break;
1783 else
1784 goto fwd_unibyte_ok;
1785 }
1786
8f924df7
KH
1787 if (!fastmap[*p])
1788 break;
327719ee 1789 fwd_unibyte_ok:
8f924df7
KH
1790 p++, pos++, pos_byte++;
1791 }
195d1361
RS
1792 }
1793 else
1794 {
b7dbcc19 1795 if (multibyte)
8f924df7 1796 while (1)
b7dbcc19 1797 {
8f924df7 1798 unsigned char *prev_p;
9af7511a 1799
8f924df7 1800 if (p <= stop)
9af7511a 1801 {
8f924df7 1802 if (p <= endp)
9af7511a 1803 break;
8f924df7
KH
1804 p = GPT_ADDR;
1805 stop = endp;
9af7511a 1806 }
8f924df7
KH
1807 prev_p = p;
1808 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 1809 c = STRING_CHAR (p);
a1bc88d4 1810
327719ee
MB
1811 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1812 {
1813 if (negate)
1814 break;
9690d026 1815 else
327719ee 1816 goto back_ok;
7e68b0ea 1817 }
a1bc88d4 1818
8f924df7 1819 if (! fastmap[*p])
b7dbcc19
KH
1820 break;
1821 if (! ASCII_CHAR_P (c))
7e68b0ea 1822 {
b7dbcc19
KH
1823 /* See the comment in the previous similar code. */
1824 for (i = 0; i < n_char_ranges; i += 2)
1825 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1826 break;
1827 if (!(negate ^ (i < n_char_ranges)))
1828 break;
7e68b0ea 1829 }
327719ee 1830 back_ok:
8f924df7 1831 pos--, pos_byte -= prev_p - p;
b7dbcc19
KH
1832 }
1833 else
8f924df7
KH
1834 while (1)
1835 {
1836 if (p <= stop)
9af7511a 1837 {
8f924df7 1838 if (p <= endp)
9af7511a 1839 break;
8f924df7
KH
1840 p = GPT_ADDR;
1841 stop = endp;
9af7511a 1842 }
e39091b6 1843
327719ee
MB
1844 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1845 {
1846 if (negate)
1847 break;
9af7511a 1848 else
327719ee
MB
1849 goto back_unibyte_ok;
1850 }
a1bc88d4 1851
8f924df7
KH
1852 if (!fastmap[p[-1]])
1853 break;
327719ee 1854 back_unibyte_ok:
8f924df7
KH
1855 p--, pos--, pos_byte--;
1856 }
195d1361 1857 }
7e68b0ea 1858
b7dbcc19
KH
1859 SET_PT_BOTH (pos, pos_byte);
1860 immediate_quit = 0;
1861
1862 return make_number (PT - start_point);
1863 }
1864}
7e68b0ea 1865
b7dbcc19
KH
1866
1867static Lisp_Object
971de7fb 1868skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
b7dbcc19
KH
1869{
1870 register unsigned int c;
1871 unsigned char fastmap[0400];
1872 int negate = 0;
1873 register int i, i_byte;
1874 int multibyte;
1875 int size_byte;
1876 unsigned char *str;
1877
1878 CHECK_STRING (string);
1879
1880 if (NILP (lim))
1881 XSETINT (lim, forwardp ? ZV : BEGV);
1882 else
1883 CHECK_NUMBER_COERCE_MARKER (lim);
1884
1885 /* In any case, don't allow scan outside bounds of buffer. */
1886 if (XINT (lim) > ZV)
1887 XSETFASTINT (lim, ZV);
1888 if (XINT (lim) < BEGV)
1889 XSETFASTINT (lim, BEGV);
1890
ade8ee9e 1891 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
49feb1cd 1892 return make_number (0);
8c6e735b 1893
b7dbcc19 1894 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
92eaa22e 1895 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
b7dbcc19 1896
72af86bd 1897 memset (fastmap, 0, sizeof fastmap);
b7dbcc19 1898
8f924df7
KH
1899 if (SBYTES (string) > SCHARS (string))
1900 /* As this is very rare case (syntax spec is ASCII only), don't
1901 consider efficiency. */
bc796a59
KH
1902 string = string_make_unibyte (string);
1903
8f924df7
KH
1904 str = SDATA (string);
1905 size_byte = SBYTES (string);
bc796a59 1906
b7dbcc19
KH
1907 i_byte = 0;
1908 if (i_byte < size_byte
8f924df7 1909 && SREF (string, 0) == '^')
b7dbcc19
KH
1910 {
1911 negate = 1; i_byte++;
1912 }
1913
b7dbcc19
KH
1914 /* Find the syntaxes specified and set their elements of fastmap. */
1915
1916 while (i_byte < size_byte)
1917 {
1918 c = str[i_byte++];
8f924df7 1919 fastmap[syntax_spec_code[c]] = 1;
b7dbcc19 1920 }
195d1361 1921
9690d026 1922 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1923 if (negate)
1924 for (i = 0; i < sizeof fastmap; i++)
9690d026 1925 fastmap[i] ^= 1;
195d1361
RS
1926
1927 {
1928 int start_point = PT;
1929 int pos = PT;
6a140a74 1930 int pos_byte = PT_BYTE;
8f924df7
KH
1931 unsigned char *p = PT_ADDR, *endp, *stop;
1932
1933 if (forwardp)
1934 {
1935 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1936 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1937 }
1938 else
1939 {
1940 endp = CHAR_POS_ADDR (XINT (lim));
1941 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1942 }
195d1361
RS
1943
1944 immediate_quit = 1;
b7dbcc19
KH
1945 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1946 if (forwardp)
195d1361 1947 {
b7dbcc19 1948 if (multibyte)
195d1361 1949 {
8f924df7 1950 while (1)
8c6e735b 1951 {
8f924df7
KH
1952 int nbytes;
1953
1954 if (p >= stop)
1955 {
1956 if (p >= endp)
1957 break;
1958 p = GAP_END_ADDR;
1959 stop = endp;
1960 }
62a6e103 1961 c = STRING_CHAR_AND_LENGTH (p, nbytes);
8f924df7 1962 if (! fastmap[(int) SYNTAX (c)])
8c6e735b 1963 break;
8f924df7 1964 p += nbytes, pos++, pos_byte += nbytes;
8c6e735b
KH
1965 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1966 }
195d1361
RS
1967 }
1968 else
1969 {
8c6e735b 1970 while (1)
7e68b0ea 1971 {
8f924df7
KH
1972 if (p >= stop)
1973 {
1974 if (p >= endp)
1975 break;
1976 p = GAP_END_ADDR;
1977 stop = endp;
1978 }
1979 if (! fastmap[(int) SYNTAX (*p)])
8c6e735b 1980 break;
8f924df7 1981 p++, pos++, pos_byte++;
b7dbcc19 1982 UPDATE_SYNTAX_TABLE_FORWARD (pos);
195d1361
RS
1983 }
1984 }
1985 }
1986 else
1987 {
b7dbcc19 1988 if (multibyte)
195d1361 1989 {
8c6e735b 1990 while (1)
b7dbcc19 1991 {
8f924df7
KH
1992 unsigned char *prev_p;
1993
1994 if (p <= stop)
b7dbcc19 1995 {
8f924df7
KH
1996 if (p <= endp)
1997 break;
1998 p = GPT_ADDR;
1999 stop = endp;
b7dbcc19 2000 }
262be72a 2001 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7
KH
2002 prev_p = p;
2003 while (--p >= stop && ! CHAR_HEAD_P (*p));
62a6e103 2004 c = STRING_CHAR (p);
8f924df7
KH
2005 if (! fastmap[(int) SYNTAX (c)])
2006 break;
2007 pos--, pos_byte -= prev_p - p;
b7dbcc19 2008 }
195d1361
RS
2009 }
2010 else
2011 {
8c6e735b
KH
2012 while (1)
2013 {
8f924df7
KH
2014 if (p <= stop)
2015 {
2016 if (p <= endp)
2017 break;
2018 p = GPT_ADDR;
2019 stop = endp;
2020 }
102f6132 2021 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7 2022 if (! fastmap[(int) SYNTAX (p[-1])])
8c6e735b 2023 break;
8f924df7 2024 p--, pos--, pos_byte--;
8c6e735b 2025 }
195d1361
RS
2026 }
2027 }
6a140a74
RS
2028
2029 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
2030 immediate_quit = 0;
2031
2032 return make_number (PT - start_point);
2033 }
2034}
a1bc88d4
RS
2035
2036/* Return 1 if character C belongs to one of the ISO classes
2037 in the list ISO_CLASSES. Each class is represented by an
2038 integer which is its type according to re_wctype. */
2039
2040static int
971de7fb 2041in_classes (int c, Lisp_Object iso_classes)
a1bc88d4
RS
2042{
2043 int fits_class = 0;
2044
618db430 2045 while (CONSP (iso_classes))
a1bc88d4
RS
2046 {
2047 Lisp_Object elt;
2048 elt = XCAR (iso_classes);
2049 iso_classes = XCDR (iso_classes);
2050
2051 if (re_iswctype (c, XFASTINT (elt)))
2052 fits_class = 1;
2053 }
2054
2055 return fits_class;
2056}
195d1361 2057\f
95ff8dfc
RS
2058/* Jump over a comment, assuming we are at the beginning of one.
2059 FROM is the current position.
2060 FROM_BYTE is the bytepos corresponding to FROM.
2061 Do not move past STOP (a charpos).
2062 The comment over which we have to jump is of style STYLE
2063 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2064 NESTING should be positive to indicate the nesting at the beginning
2065 for nested comments and should be zero or negative else.
2066 ST_COMMENT_STYLE cannot be nested.
2067 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2068 (or 0 If the search cannot start in the middle of a two-character).
2069
2070 If successful, return 1 and store the charpos of the comment's end
2071 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2072 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2073 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2074 (as defined for state.incomment) in *INCOMMENT_PTR.
2075
2076 The comment end is the last character of the comment rather than the
2077 character just after the comment.
2078
2079 Global syntax data is assumed to initially be valid for FROM and
2080 remains valid for forward search starting at the returned position. */
2081
2082static int
dd4c5104
DN
2083forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2084 int nesting, int style, int prev_syntax,
2085 EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2086 int *incomment_ptr)
95ff8dfc
RS
2087{
2088 register int c, c1;
2089 register enum syntaxcode code;
2090 register int syntax;
2091
2092 if (nesting <= 0) nesting = -1;
2093
2094 /* Enter the loop in the middle so that we find
2095 a 2-char comment ender if we start in the middle of it. */
2096 syntax = prev_syntax;
2097 if (syntax != 0) goto forw_incomment;
2098
2099 while (1)
2100 {
2101 if (from == stop)
2102 {
2103 *incomment_ptr = nesting;
2104 *charpos_ptr = from;
2105 *bytepos_ptr = from_byte;
2106 return 0;
2107 }
8f924df7 2108 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2109 syntax = SYNTAX_WITH_FLAGS (c);
2110 code = syntax & 0xff;
2111 if (code == Sendcomment
2112 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
2113 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2114 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
2115 /* we have encountered a comment end of the same style
2116 as the comment sequence which began this comment
2117 section */
2118 break;
2119 if (code == Scomment_fence
2120 && style == ST_COMMENT_STYLE)
2121 /* we have encountered a comment end of the same style
2122 as the comment sequence which began this comment
2123 section. */
2124 break;
2125 if (nesting > 0
2126 && code == Scomment
abf8a9ff 2127 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
2128 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
2129 /* we have encountered a nested comment of the same style
2130 as the comment sequence which began this comment section */
2131 nesting++;
2132 INC_BOTH (from, from_byte);
2133 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 2134
95ff8dfc
RS
2135 forw_incomment:
2136 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2137 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
8f924df7 2138 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
abf8a9ff
SM
2139 SYNTAX_COMEND_SECOND (c1))
2140 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2141 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
2142 {
2143 if (--nesting <= 0)
2144 /* we have encountered a comment end of the same style
2145 as the comment sequence which began this comment
2146 section */
2147 break;
2148 else
2149 {
2150 INC_BOTH (from, from_byte);
2151 UPDATE_SYNTAX_TABLE_FORWARD (from);
2152 }
2153 }
95ff8dfc
RS
2154 if (nesting > 0
2155 && from < stop
2156 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
8f924df7 2157 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
95ff8dfc 2158 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
2159 && SYNTAX_COMSTART_SECOND (c1))
2160 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2161 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
2162 /* we have encountered a nested comment of the same style
2163 as the comment sequence which began this comment
2164 section */
2165 {
2166 INC_BOTH (from, from_byte);
2167 UPDATE_SYNTAX_TABLE_FORWARD (from);
2168 nesting++;
2169 }
2170 }
2171 *charpos_ptr = from;
2172 *bytepos_ptr = from_byte;
2173 return 1;
2174}
2175
b3cfe0c8 2176DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 2177 doc: /*
ee648c11 2178Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
2179Stop scanning if we find something other than a comment or whitespace.
2180Set point to where scanning stops.
ee648c11 2181If COUNT comments are found as expected, with nothing except whitespace
fdb82f93 2182between them, return t; otherwise return nil. */)
5842a27b 2183 (Lisp_Object count)
b3cfe0c8 2184{
2312c580
SM
2185 register EMACS_INT from;
2186 EMACS_INT from_byte;
2187 register EMACS_INT stop;
8ea151b2 2188 register int c, c1;
b3cfe0c8
RS
2189 register enum syntaxcode code;
2190 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2191 int comnested = 0; /* whether the comment is nestable or not */
be720845 2192 int found;
2312c580
SM
2193 EMACS_INT count1;
2194 EMACS_INT out_charpos, out_bytepos;
95ff8dfc 2195 int dummy;
840f481c 2196
b7826503 2197 CHECK_NUMBER (count);
840f481c 2198 count1 = XINT (count);
195d1361 2199 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2200
2201 immediate_quit = 1;
2202 QUIT;
2203
2204 from = PT;
6a140a74 2205 from_byte = PT_BYTE;
b3cfe0c8 2206
195d1361 2207 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2208 while (count1 > 0)
b3cfe0c8 2209 {
04882296 2210 do
b3cfe0c8 2211 {
f902a008
RS
2212 int comstart_first;
2213
04882296
KH
2214 if (from == stop)
2215 {
ef316cf0 2216 SET_PT_BOTH (from, from_byte);
b7e6e612 2217 immediate_quit = 0;
04882296
KH
2218 return Qnil;
2219 }
b7dbcc19 2220 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
b3cfe0c8 2221 code = SYNTAX (c);
f902a008 2222 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2223 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2224 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 2225 INC_BOTH (from, from_byte);
f902a008 2226 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2227 if (from < stop && comstart_first
b7dbcc19 2228 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
8ea151b2 2229 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 2230 {
7d0393cf 2231 /* We have encountered a comment start sequence and we
7fc8191e 2232 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2233 the comment style this sequence begins so that later,
2234 only a comment end of the same style actually ends
7fc8191e 2235 the comment section. */
b3cfe0c8 2236 code = Scomment;
8ea151b2 2237 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 2238 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2239 INC_BOTH (from, from_byte);
95ff8dfc 2240 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2241 }
04882296 2242 }
abf8a9ff 2243 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2244
da71ab91
KH
2245 if (code == Scomment_fence)
2246 comstyle = ST_COMMENT_STYLE;
2247 else if (code != Scomment)
04882296
KH
2248 {
2249 immediate_quit = 0;
6a140a74 2250 DEC_BOTH (from, from_byte);
ef316cf0 2251 SET_PT_BOTH (from, from_byte);
04882296
KH
2252 return Qnil;
2253 }
2254 /* We're at the start of a comment. */
95ff8dfc
RS
2255 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2256 &out_charpos, &out_bytepos, &dummy);
2257 from = out_charpos; from_byte = out_bytepos;
2258 if (!found)
04882296 2259 {
95ff8dfc
RS
2260 immediate_quit = 0;
2261 SET_PT_BOTH (from, from_byte);
2262 return Qnil;
b3cfe0c8 2263 }
95ff8dfc
RS
2264 INC_BOTH (from, from_byte);
2265 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2266 /* We have skipped one comment. */
840f481c 2267 count1--;
b3cfe0c8
RS
2268 }
2269
840f481c 2270 while (count1 < 0)
b3cfe0c8 2271 {
b9145dbb 2272 while (1)
b3cfe0c8 2273 {
c65adb44 2274 int quoted;
f902a008 2275
b9145dbb
RS
2276 if (from <= stop)
2277 {
6a140a74 2278 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2279 immediate_quit = 0;
2280 return Qnil;
2281 }
b3cfe0c8 2282
6a140a74 2283 DEC_BOTH (from, from_byte);
f902a008 2284 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2285 quoted = char_quoted (from, from_byte);
b7dbcc19 2286 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
b3cfe0c8
RS
2287 code = SYNTAX (c);
2288 comstyle = 0;
95ff8dfc 2289 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2290 if (code == Sendcomment)
2291 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 2292 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 2293 && prev_char_comend_first (from, from_byte)
89c6809a 2294 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2295 {
7fc8191e 2296 /* We must record the comment style encountered so that
b3cfe0c8 2297 later, we can match only the proper comment begin
7fc8191e 2298 sequence of the same style. */
6a140a74 2299 DEC_BOTH (from, from_byte);
f902a008
RS
2300 code = Sendcomment;
2301 /* Calling char_quoted, above, set up global syntax position
2302 at the new value of FROM. */
b7dbcc19 2303 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2304 comstyle = SYNTAX_COMMENT_STYLE (c1);
2305 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
2306 }
2307
195d1361
RS
2308 if (code == Scomment_fence)
2309 {
2310 /* Skip until first preceding unquoted comment_fence. */
6a140a74 2311 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 2312
6a140a74 2313 while (1)
195d1361 2314 {
6a140a74 2315 DEC_BOTH (from, from_byte);
195d1361 2316 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2317 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
6a140a74 2318 if (SYNTAX (c) == Scomment_fence
7d0393cf 2319 && !char_quoted (from, from_byte))
195d1361 2320 {
7d0393cf 2321 found = 1;
195d1361
RS
2322 break;
2323 }
fcdd4585
SM
2324 else if (from == stop)
2325 break;
195d1361
RS
2326 }
2327 if (found == 0)
2328 {
2329 from = ini; /* Set point to ini + 1. */
6a140a74 2330 from_byte = ini_byte;
195d1361
RS
2331 goto leave;
2332 }
39c41ad4
SM
2333 else
2334 /* We have skipped one comment. */
2335 break;
195d1361
RS
2336 }
2337 else if (code == Sendcomment)
b3cfe0c8 2338 {
95ff8dfc 2339 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2340 &out_charpos, &out_bytepos);
1aa963c8
SM
2341 if (found == -1)
2342 {
abf8a9ff
SM
2343 if (c == '\n')
2344 /* This end-of-line is not an end-of-comment.
2345 Treat it like a whitespace.
2346 CC-mode (and maybe others) relies on this behavior. */
2347 ;
2348 else
2349 {
2350 /* Failure: we should go back to the end of this
2351 not-quite-endcomment. */
2352 if (SYNTAX(c) != code)
2353 /* It was a two-char Sendcomment. */
2354 INC_BOTH (from, from_byte);
2355 goto leave;
2356 }
1aa963c8 2357 }
2b927d02 2358 else
abf8a9ff
SM
2359 {
2360 /* We have skipped one comment. */
2361 from = out_charpos, from_byte = out_bytepos;
2362 break;
2363 }
b3cfe0c8 2364 }
bb0de084 2365 else if (code != Swhitespace || quoted)
b3cfe0c8 2366 {
195d1361 2367 leave:
b3cfe0c8 2368 immediate_quit = 0;
6a140a74 2369 INC_BOTH (from, from_byte);
ef316cf0 2370 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2371 return Qnil;
2372 }
2373 }
2374
840f481c 2375 count1++;
b3cfe0c8
RS
2376 }
2377
ef316cf0 2378 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2379 immediate_quit = 0;
2380 return Qt;
2381}
2382\f
b7dbcc19 2383/* Return syntax code of character C if C is an ASCII character
db9f48c3 2384 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08 2385
b7dbcc19
KH
2386#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2387 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
bd25db08
KH
2388 ? SYNTAX (c) : Ssymbol)
2389
6a140a74 2390static Lisp_Object
971de7fb 2391scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
8489eb67
RS
2392{
2393 Lisp_Object val;
932e6895 2394 register EMACS_INT stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2395 register int c, c1;
2396 int stringterm;
8489eb67
RS
2397 int quoted;
2398 int mathexit = 0;
93da5fff 2399 register enum syntaxcode code, temp_code;
195d1361 2400 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2401 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2402 int comnested = 0; /* whether the comment is nestable or not */
932e6895
SM
2403 EMACS_INT temp_pos;
2404 EMACS_INT last_good = from;
195d1361 2405 int found;
932e6895
SM
2406 EMACS_INT from_byte;
2407 EMACS_INT out_bytepos, out_charpos;
95ff8dfc 2408 int temp, dummy;
bd25db08 2409 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2410
2411 if (depth > 0) min_depth = 0;
2412
c3907a7e
RS
2413 if (from > ZV) from = ZV;
2414 if (from < BEGV) from = BEGV;
2415
2416 from_byte = CHAR_TO_BYTE (from);
2417
8489eb67
RS
2418 immediate_quit = 1;
2419 QUIT;
2420
195d1361 2421 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2422 while (count > 0)
2423 {
8489eb67
RS
2424 while (from < stop)
2425 {
f902a008 2426 int comstart_first, prefix;
195d1361 2427 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2428 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2429 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2430 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2431 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2432 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2433 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2434 if (depth == min_depth)
2435 last_good = from;
6a140a74 2436 INC_BOTH (from, from_byte);
195d1361 2437 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2438 if (from < stop && comstart_first
327719ee
MB
2439 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2440 SYNTAX_COMSTART_SECOND (c))
8489eb67 2441 && parse_sexp_ignore_comments)
e5d4f4dc 2442 {
7d0393cf 2443 /* we have encountered a comment start sequence and we
195d1361 2444 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2445 the comment style this sequence begins so that later,
2446 only a comment end of the same style actually ends
2447 the comment section */
2448 code = Scomment;
b7dbcc19 2449 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2450 comstyle = SYNTAX_COMMENT_STYLE (c1);
2451 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2452 INC_BOTH (from, from_byte);
f902a008 2453 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2454 }
7d0393cf 2455
f902a008 2456 if (prefix)
8489eb67
RS
2457 continue;
2458
0220c518 2459 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2460 {
2461 case Sescape:
2462 case Scharquote:
8b8bf8e6
SM
2463 if (from == stop)
2464 goto lose;
6a140a74 2465 INC_BOTH (from, from_byte);
8489eb67
RS
2466 /* treat following character as a word constituent */
2467 case Sword:
2468 case Ssymbol:
2469 if (depth || !sexpflag) break;
195d1361 2470 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2471 while (from < stop)
2472 {
195d1361 2473 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2474
2475 /* Some compilers can't handle this inside the switch. */
b7dbcc19 2476 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2477 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2478 switch (temp)
8489eb67
RS
2479 {
2480 case Scharquote:
2481 case Sescape:
6a140a74 2482 INC_BOTH (from, from_byte);
8b8bf8e6
SM
2483 if (from == stop)
2484 goto lose;
8489eb67
RS
2485 break;
2486 case Sword:
2487 case Ssymbol:
2488 case Squote:
2489 break;
2490 default:
2491 goto done;
2492 }
6a140a74 2493 INC_BOTH (from, from_byte);
8489eb67
RS
2494 }
2495 goto done;
2496
195d1361 2497 case Scomment_fence:
95ff8dfc
RS
2498 comstyle = ST_COMMENT_STYLE;
2499 /* FALLTHROUGH */
2500 case Scomment:
8489eb67 2501 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2502 UPDATE_SYNTAX_TABLE_FORWARD (from);
2503 found = forw_comment (from, from_byte, stop,
2504 comnested, comstyle, 0,
2505 &out_charpos, &out_bytepos, &dummy);
2506 from = out_charpos, from_byte = out_bytepos;
2507 if (!found)
8489eb67 2508 {
95ff8dfc
RS
2509 if (depth == 0)
2510 goto done;
2511 goto lose;
8489eb67 2512 }
95ff8dfc
RS
2513 INC_BOTH (from, from_byte);
2514 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2515 break;
2516
2517 case Smath:
2518 if (!sexpflag)
2519 break;
b7dbcc19 2520 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
6a140a74
RS
2521 {
2522 INC_BOTH (from, from_byte);
2523 }
8489eb67
RS
2524 if (mathexit)
2525 {
2526 mathexit = 0;
2527 goto close1;
2528 }
2529 mathexit = 1;
2530
2531 case Sopen:
2532 if (!++depth) goto done;
2533 break;
2534
2535 case Sclose:
2536 close1:
2537 if (!--depth) goto done;
2538 if (depth < min_depth)
9471945b
KS
2539 xsignal3 (Qscan_error,
2540 build_string ("Containing expression ends prematurely"),
2541 make_number (last_good), make_number (from));
8489eb67
RS
2542 break;
2543
2544 case Sstring:
195d1361 2545 case Sstring_fence:
ef316cf0 2546 temp_pos = dec_bytepos (from_byte);
b7dbcc19 2547 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
8489eb67
RS
2548 while (1)
2549 {
8b8bf8e6
SM
2550 if (from >= stop)
2551 goto lose;
195d1361 2552 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2553 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
42ebfa31
SM
2554 if (code == Sstring
2555 ? (c == stringterm
2556 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2557 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2558 break;
7c7ca3d2
RS
2559
2560 /* Some compilers can't handle this inside the switch. */
bd25db08 2561 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2562 switch (temp)
8489eb67
RS
2563 {
2564 case Scharquote:
2565 case Sescape:
6a140a74 2566 INC_BOTH (from, from_byte);
8489eb67 2567 }
6a140a74 2568 INC_BOTH (from, from_byte);
8489eb67 2569 }
6a140a74 2570 INC_BOTH (from, from_byte);
8489eb67
RS
2571 if (!depth && sexpflag) goto done;
2572 break;
240c43e8
SM
2573 default:
2574 /* Ignore whitespace, punctuation, quote, endcomment. */
2575 break;
8489eb67
RS
2576 }
2577 }
2578
2579 /* Reached end of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2580 if (depth)
2581 goto lose;
8489eb67
RS
2582
2583 immediate_quit = 0;
2584 return Qnil;
2585
2586 /* End of object reached */
2587 done:
2588 count--;
2589 }
2590
2591 while (count < 0)
2592 {
8489eb67
RS
2593 while (from > stop)
2594 {
6a140a74 2595 DEC_BOTH (from, from_byte);
195d1361 2596 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2597 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2598 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2599 if (depth == min_depth)
2600 last_good = from;
7fc8191e 2601 comstyle = 0;
95ff8dfc 2602 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2603 if (code == Sendcomment)
2604 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2605 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2606 && prev_char_comend_first (from, from_byte)
8489eb67 2607 && parse_sexp_ignore_comments)
e5d4f4dc 2608 {
f902a008 2609 /* We must record the comment style encountered so that
e5d4f4dc 2610 later, we can match only the proper comment begin
f902a008 2611 sequence of the same style. */
6a140a74 2612 DEC_BOTH (from, from_byte);
f902a008
RS
2613 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2614 code = Sendcomment;
b7dbcc19 2615 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2616 comstyle = SYNTAX_COMMENT_STYLE (c1);
2617 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2618 }
7d0393cf 2619
9828a477 2620 /* Quoting turns anything except a comment-ender
e6365855 2621 into a word character. Note that this cannot be true
f902a008 2622 if we decremented FROM in the if-statement above. */
9828a477 2623 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2624 {
2625 DEC_BOTH (from, from_byte);
2626 code = Sword;
2627 }
9828a477 2628 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2629 continue;
2630
9828a477 2631 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2632 {
2633 case Sword:
2634 case Ssymbol:
9828a477
KH
2635 case Sescape:
2636 case Scharquote:
8489eb67 2637 if (depth || !sexpflag) break;
195d1361
RS
2638 /* This word counts as a sexp; count object finished
2639 after passing it. */
8489eb67
RS
2640 while (from > stop)
2641 {
6a140a74 2642 temp_pos = from_byte;
ef316cf0
RS
2643 if (! NILP (current_buffer->enable_multibyte_characters))
2644 DEC_POS (temp_pos);
2645 else
2646 temp_pos--;
6a140a74 2647 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2648 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2649 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2650 /* Don't allow comment-end to be quoted. */
2651 if (temp_code == Sendcomment)
2652 goto done2;
6a140a74 2653 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2654 if (quoted)
93da5fff 2655 {
6a140a74 2656 DEC_BOTH (from, from_byte);
ef316cf0 2657 temp_pos = dec_bytepos (temp_pos);
6a140a74 2658 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2659 }
b7dbcc19 2660 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2661 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2662 if (! (quoted || temp_code == Sword
2663 || temp_code == Ssymbol
2664 || temp_code == Squote))
8489eb67 2665 goto done2;
6a140a74 2666 DEC_BOTH (from, from_byte);
8489eb67
RS
2667 }
2668 goto done2;
2669
2670 case Smath:
2671 if (!sexpflag)
2672 break;
ef316cf0 2673 temp_pos = dec_bytepos (from_byte);
6a140a74 2674 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2675 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
6a140a74 2676 DEC_BOTH (from, from_byte);
8489eb67
RS
2677 if (mathexit)
2678 {
2679 mathexit = 0;
2680 goto open2;
2681 }
2682 mathexit = 1;
2683
2684 case Sclose:
2685 if (!++depth) goto done2;
2686 break;
2687
2688 case Sopen:
2689 open2:
2690 if (!--depth) goto done2;
2691 if (depth < min_depth)
9471945b
KS
2692 xsignal3 (Qscan_error,
2693 build_string ("Containing expression ends prematurely"),
2694 make_number (last_good), make_number (from));
8489eb67
RS
2695 break;
2696
2697 case Sendcomment:
2698 if (!parse_sexp_ignore_comments)
2699 break;
95ff8dfc 2700 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2701 &out_charpos, &out_bytepos);
1aa963c8
SM
2702 /* FIXME: if found == -1, then it really wasn't a comment-end.
2703 For single-char Sendcomment, we can't do much about it apart
2704 from skipping the char.
2705 For 2-char endcomments, we could try again, taking both
2706 chars as separate entities, but it's a lot of trouble
2707 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2708 if (found != -1)
2709 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2710 break;
2711
195d1361
RS
2712 case Scomment_fence:
2713 case Sstring_fence:
2714 while (1)
2715 {
8b8bf8e6
SM
2716 if (from == stop)
2717 goto lose;
6b61353c 2718 DEC_BOTH (from, from_byte);
195d1361 2719 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2720 if (!char_quoted (from, from_byte)
b7dbcc19 2721 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
bd25db08 2722 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2723 break;
2724 }
2725 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2726 break;
7d0393cf 2727
8489eb67 2728 case Sstring:
b7dbcc19 2729 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8489eb67
RS
2730 while (1)
2731 {
8b8bf8e6
SM
2732 if (from == stop)
2733 goto lose;
6b61353c
KH
2734 DEC_BOTH (from, from_byte);
2735 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2736 if (!char_quoted (from, from_byte)
2737 && (stringterm
2738 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
42ebfa31 2739 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2740 break;
8489eb67 2741 }
8489eb67
RS
2742 if (!depth && sexpflag) goto done2;
2743 break;
240c43e8
SM
2744 default:
2745 /* Ignore whitespace, punctuation, quote, endcomment. */
2746 break;
8489eb67
RS
2747 }
2748 }
2749
2750 /* Reached start of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2751 if (depth)
2752 goto lose;
8489eb67
RS
2753
2754 immediate_quit = 0;
2755 return Qnil;
2756
2757 done2:
2758 count++;
2759 }
2760
2761
2762 immediate_quit = 0;
1e142fb7 2763 XSETFASTINT (val, from);
8489eb67
RS
2764 return val;
2765
2766 lose:
9471945b
KS
2767 xsignal3 (Qscan_error,
2768 build_string ("Unbalanced parentheses"),
2769 make_number (last_good), make_number (from));
8489eb67
RS
2770}
2771
8489eb67 2772DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2773 doc: /* Scan from character number FROM by COUNT lists.
2774Returns the character number of the position thus found.
2775
2776If DEPTH is nonzero, paren depth begins counting from that value,
2777only places where the depth in parentheses becomes zero
2778are candidates for stopping; COUNT such places are counted.
2779Thus, a positive value for DEPTH means go out levels.
2780
2781Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2782
2783If the beginning or end of (the accessible part of) the buffer is reached
2784and the depth is wrong, an error is signaled.
2785If the depth is right but the count is not used up, nil is returned. */)
5842a27b 2786 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
8489eb67 2787{
b7826503
PJ
2788 CHECK_NUMBER (from);
2789 CHECK_NUMBER (count);
2790 CHECK_NUMBER (depth);
8489eb67
RS
2791
2792 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2793}
2794
2795DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2796 doc: /* Scan from character number FROM by COUNT balanced expressions.
2797If COUNT is negative, scan backwards.
2798Returns the character number of the position thus found.
2799
2800Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2801
2802If the beginning or end of (the accessible part of) the buffer is reached
2803in the middle of a parenthetical grouping, an error is signaled.
2804If the beginning or end is reached between groupings
2805but before count is used up, nil is returned. */)
5842a27b 2806 (Lisp_Object from, Lisp_Object count)
8489eb67 2807{
b7826503
PJ
2808 CHECK_NUMBER (from);
2809 CHECK_NUMBER (count);
8489eb67
RS
2810
2811 return scan_lists (XINT (from), XINT (count), 0, 1);
2812}
2813
2814DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2815 0, 0, 0,
2816 doc: /* Move point backward over any number of chars with prefix syntax.
2817This includes chars with "quote" or "prefix" syntax (' or p). */)
5842a27b 2818 (void)
8489eb67
RS
2819{
2820 int beg = BEGV;
6a140a74
RS
2821 int opoint = PT;
2822 int opoint_byte = PT_BYTE;
6ec8bbd2 2823 int pos = PT;
6a140a74 2824 int pos_byte = PT_BYTE;
93da5fff 2825 int c;
93da5fff 2826
7d0393cf 2827 if (pos <= beg)
195d1361 2828 {
f902a008
RS
2829 SET_PT_BOTH (opoint, opoint_byte);
2830
2831 return Qnil;
195d1361 2832 }
8489eb67 2833
f902a008
RS
2834 SETUP_SYNTAX_TABLE (pos, -1);
2835
6a140a74
RS
2836 DEC_BOTH (pos, pos_byte);
2837
1fd3172d 2838 while (!char_quoted (pos, pos_byte)
195d1361 2839 /* Previous statement updates syntax table. */
b7dbcc19 2840 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2841 || SYNTAX_PREFIX (c)))
2842 {
1fd3172d
RS
2843 opoint = pos;
2844 opoint_byte = pos_byte;
2845
2846 if (pos + 1 > beg)
2847 DEC_BOTH (pos, pos_byte);
93da5fff 2848 }
8489eb67 2849
6a140a74 2850 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2851
2852 return Qnil;
2853}
2854\f
6a140a74 2855/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2856 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2857 and return a description of the state of the parse at END.
c81a3712 2858 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2859 If COMMENTSTOP is 1, stop at the start of a comment.
2860 If COMMENTSTOP is -1, stop at the start or end of a comment,
2861 after the beginning of a string, or after the end of a string. */
8489eb67 2862
340f92b5 2863static void
6a140a74 2864scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2865 stopbefore, oldstate, commentstop)
e5d4f4dc 2866 struct lisp_parse_state *stateptr;
8e2911c2
AS
2867 register EMACS_INT from;
2868 EMACS_INT from_byte, end;
2869 int targetdepth, stopbefore;
8489eb67 2870 Lisp_Object oldstate;
c81a3712 2871 int commentstop;
8489eb67
RS
2872{
2873 struct lisp_parse_state state;
2874
2875 register enum syntaxcode code;
95ff8dfc
RS
2876 int c1;
2877 int comnested;
8489eb67
RS
2878 struct level { int last, prev; };
2879 struct level levelstart[100];
2880 register struct level *curlevel = levelstart;
2881 struct level *endlevel = levelstart + 100;
8489eb67
RS
2882 register int depth; /* Paren depth of current scanning location.
2883 level - levelstart equals this except
2884 when the depth becomes negative. */
2885 int mindepth; /* Lowest DEPTH value seen. */
2886 int start_quoted = 0; /* Nonzero means starting after a char quote */
2887 Lisp_Object tem;
8e2911c2
AS
2888 EMACS_INT prev_from; /* Keep one character before FROM. */
2889 EMACS_INT prev_from_byte;
1fd3172d 2890 int prev_from_syntax;
195d1361
RS
2891 int boundary_stop = commentstop == -1;
2892 int nofence;
95ff8dfc 2893 int found;
60c86a83 2894 EMACS_INT out_bytepos, out_charpos;
7c7ca3d2 2895 int temp;
93da5fff
KH
2896
2897 prev_from = from;
6a140a74
RS
2898 prev_from_byte = from_byte;
2899 if (from != BEGV)
2900 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2901
2902 /* Use this macro instead of `from++'. */
6a140a74
RS
2903#define INC_FROM \
2904do { prev_from = from; \
2905 prev_from_byte = from_byte; \
327719ee 2906 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
ab229fdd 2907 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2908 INC_BOTH (from, from_byte); \
3ceb4629
SM
2909 if (from < end) \
2910 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2911 } while (0)
8489eb67
RS
2912
2913 immediate_quit = 1;
2914 QUIT;
2915
265a9e55 2916 if (NILP (oldstate))
8489eb67
RS
2917 {
2918 depth = 0;
2919 state.instring = -1;
2920 state.incomment = 0;
195d1361
RS
2921 state.comstyle = 0; /* comment style a by default. */
2922 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2923 }
2924 else
2925 {
2926 tem = Fcar (oldstate);
265a9e55 2927 if (!NILP (tem))
8489eb67
RS
2928 depth = XINT (tem);
2929 else
2930 depth = 0;
2931
2932 oldstate = Fcdr (oldstate);
2933 oldstate = Fcdr (oldstate);
2934 oldstate = Fcdr (oldstate);
2935 tem = Fcar (oldstate);
195d1361 2936 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2937 state.instring = (!NILP (tem)
2938 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2939 : -1);
8489eb67
RS
2940
2941 oldstate = Fcdr (oldstate);
2942 tem = Fcar (oldstate);
e76f1c44
GM
2943 state.incomment = (!NILP (tem)
2944 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2945 : 0);
8489eb67
RS
2946
2947 oldstate = Fcdr (oldstate);
2948 tem = Fcar (oldstate);
265a9e55 2949 start_quoted = !NILP (tem);
e5d4f4dc 2950
95ff8dfc 2951 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2952 style a. If it is non-nil, we are in comment style b */
2953 oldstate = Fcdr (oldstate);
e5d4f4dc 2954 oldstate = Fcdr (oldstate);
195d1361 2955 tem = Fcar (oldstate);
7d0393cf 2956 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 2957 ? ST_COMMENT_STYLE : 1);
195d1361 2958
e5d4f4dc 2959 oldstate = Fcdr (oldstate);
e5d4f4dc 2960 tem = Fcar (oldstate);
195d1361 2961 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2962 oldstate = Fcdr (oldstate);
2963 tem = Fcar (oldstate);
2964 while (!NILP (tem)) /* >= second enclosing sexps. */
2965 {
2966 /* curlevel++->last ran into compiler bug on Apollo */
2967 curlevel->last = XINT (Fcar (tem));
2968 if (++curlevel == endlevel)
02010917 2969 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2970 curlevel->prev = -1;
2971 curlevel->last = -1;
2972 tem = Fcdr (tem);
2973 }
8489eb67
RS
2974 }
2975 state.quoted = 0;
2976 mindepth = depth;
2977
2978 curlevel->prev = -1;
2979 curlevel->last = -1;
2980
326b283b 2981 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
2982 temp = FETCH_CHAR (prev_from_byte);
2983 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 2984 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 2985
195d1361 2986 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2987
326b283b
RS
2988 if (state.incomment)
2989 goto startincomment;
8489eb67
RS
2990 if (state.instring >= 0)
2991 {
195d1361 2992 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2993 if (start_quoted)
2994 goto startquotedinstring;
8489eb67
RS
2995 goto startinstring;
2996 }
326b283b
RS
2997 else if (start_quoted)
2998 goto startquoted;
1fd3172d 2999
8489eb67
RS
3000 while (from < end)
3001 {
93da5fff 3002 INC_FROM;
1fd3172d 3003 code = prev_from_syntax & 0xff;
4c920633 3004
c5d4b96f
SM
3005 if (from < end
3006 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3007 && (c1 = FETCH_CHAR (from_byte),
3008 SYNTAX_COMSTART_SECOND (c1)))
3009 /* Duplicate code to avoid a complex if-expression
3010 which causes trouble for the SGI compiler. */
95ff8dfc 3011 {
c5d4b96f
SM
3012 /* Record the comment style we have entered so that only
3013 the comment-end sequence of the same style actually
3014 terminates the comment section. */
3015 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
3016 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3017 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
3018 state.incomment = comnested ? 1 : -1;
95ff8dfc 3019 state.comstr_start = prev_from;
c5d4b96f
SM
3020 INC_FROM;
3021 code = Scomment;
95ff8dfc 3022 }
4c920633 3023 else if (code == Scomment_fence)
e5d4f4dc
RS
3024 {
3025 /* Record the comment style we have entered so that only
3026 the comment-end sequence of the same style actually
3027 terminates the comment section. */
95ff8dfc
RS
3028 state.comstyle = ST_COMMENT_STYLE;
3029 state.incomment = -1;
195d1361 3030 state.comstr_start = prev_from;
e5d4f4dc 3031 code = Scomment;
e5d4f4dc 3032 }
c5d4b96f
SM
3033 else if (code == Scomment)
3034 {
3035 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
3036 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3037 1 : -1);
3038 state.comstr_start = prev_from;
3039 }
e5d4f4dc 3040
1fd3172d 3041 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 3042 continue;
0220c518 3043 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
3044 {
3045 case Sescape:
3046 case Scharquote:
3047 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3048 curlevel->last = prev_from;
8489eb67
RS
3049 startquoted:
3050 if (from == end) goto endquoted;
93da5fff 3051 INC_FROM;
8489eb67
RS
3052 goto symstarted;
3053 /* treat following character as a word constituent */
3054 case Sword:
3055 case Ssymbol:
3056 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3057 curlevel->last = prev_from;
8489eb67
RS
3058 symstarted:
3059 while (from < end)
3060 {
7c7ca3d2 3061 /* Some compilers can't handle this inside the switch. */
327719ee 3062 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
ab229fdd 3063 temp = SYNTAX (temp);
7c7ca3d2 3064 switch (temp)
8489eb67
RS
3065 {
3066 case Scharquote:
3067 case Sescape:
93da5fff 3068 INC_FROM;
8489eb67
RS
3069 if (from == end) goto endquoted;
3070 break;
3071 case Sword:
3072 case Ssymbol:
3073 case Squote:
3074 break;
3075 default:
3076 goto symdone;
3077 }
93da5fff 3078 INC_FROM;
8489eb67
RS
3079 }
3080 symdone:
3081 curlevel->prev = curlevel->last;
3082 break;
3083
240c43e8 3084 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 3085 case Scomment:
195d1361 3086 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
3087 startincomment:
3088 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 3089 that we find a 2-char comment ender even if we start in the
e6365855 3090 middle of it. We don't want to do that if we're just at the
d355bd8a 3091 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
3092 found = forw_comment (from, from_byte, end,
3093 state.incomment, state.comstyle,
e6365855
SM
3094 (from == BEGV || from < state.comstr_start + 3)
3095 ? 0 : prev_from_syntax,
95ff8dfc
RS
3096 &out_charpos, &out_bytepos, &state.incomment);
3097 from = out_charpos; from_byte = out_bytepos;
3098 /* Beware! prev_from and friends are invalid now.
3099 Luckily, the `done' doesn't use them and the INC_FROM
3100 sets them to a sane value without looking at them. */
3101 if (!found) goto done;
d355bd8a 3102 INC_FROM;
8489eb67 3103 state.incomment = 0;
e5d4f4dc 3104 state.comstyle = 0; /* reset the comment style */
195d1361 3105 if (boundary_stop) goto done;
8489eb67
RS
3106 break;
3107
3108 case Sopen:
3109 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3110 depth++;
3111 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 3112 curlevel->last = prev_from;
8489eb67 3113 if (++curlevel == endlevel)
02010917 3114 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
3115 curlevel->prev = -1;
3116 curlevel->last = -1;
30844415 3117 if (targetdepth == depth) goto done;
8489eb67
RS
3118 break;
3119
3120 case Sclose:
3121 depth--;
3122 if (depth < mindepth)
3123 mindepth = depth;
3124 if (curlevel != levelstart)
3125 curlevel--;
3126 curlevel->prev = curlevel->last;
30844415 3127 if (targetdepth == depth) goto done;
8489eb67
RS
3128 break;
3129
3130 case Sstring:
195d1361
RS
3131 case Sstring_fence:
3132 state.comstr_start = from - 1;
8489eb67 3133 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3134 curlevel->last = prev_from;
7d0393cf 3135 state.instring = (code == Sstring
b7dbcc19 3136 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
195d1361
RS
3137 : ST_STRING_STYLE);
3138 if (boundary_stop) goto done;
8489eb67 3139 startinstring:
195d1361 3140 {
644ea4df 3141 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 3142
644ea4df
RS
3143 while (1)
3144 {
3145 int c;
195d1361 3146
644ea4df 3147 if (from >= end) goto done;
b7dbcc19 3148 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
7c7ca3d2
RS
3149 /* Some compilers can't handle this inside the switch. */
3150 temp = SYNTAX (c);
ccf89641
KH
3151
3152 /* Check TEMP here so that if the char has
3153 a syntax-table property which says it is NOT
3154 a string character, it does not end the string. */
3155 if (nofence && c == state.instring && temp == Sstring)
3156 break;
3157
7c7ca3d2 3158 switch (temp)
644ea4df
RS
3159 {
3160 case Sstring_fence:
3161 if (!nofence) goto string_end;
3162 break;
3163 case Scharquote:
3164 case Sescape:
3165 INC_FROM;
3166 startquotedinstring:
3167 if (from >= end) goto endquoted;
195d1361 3168 }
644ea4df
RS
3169 INC_FROM;
3170 }
195d1361
RS
3171 }
3172 string_end:
8489eb67
RS
3173 state.instring = -1;
3174 curlevel->prev = curlevel->last;
93da5fff 3175 INC_FROM;
195d1361 3176 if (boundary_stop) goto done;
8489eb67
RS
3177 break;
3178
3179 case Smath:
240c43e8
SM
3180 /* FIXME: We should do something with it. */
3181 break;
3182 default:
3183 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
3184 break;
3185 }
3186 }
3187 goto done;
3188
3189 stop: /* Here if stopping before start of sexp. */
93da5fff 3190 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3191 goto done; /* but return the position before it. */
3192
3193 endquoted:
3194 state.quoted = 1;
3195 done:
3196 state.depth = depth;
3197 state.mindepth = mindepth;
3198 state.thislevelstart = curlevel->prev;
3199 state.prevlevelstart
3200 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3201 state.location = from;
1a102a58
RS
3202 state.levelstarts = Qnil;
3203 while (--curlevel >= levelstart)
3204 state.levelstarts = Fcons (make_number (curlevel->last),
3205 state.levelstarts);
8489eb67
RS
3206 immediate_quit = 0;
3207
e5d4f4dc 3208 *stateptr = state;
8489eb67
RS
3209}
3210
c81a3712 3211DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3212 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3213Parsing stops at TO or when certain criteria are met;
3214 point is set to where parsing stops.
3215If fifth arg OLDSTATE is omitted or nil,
3216 parsing assumes that FROM is the beginning of a function.
0e6228bc 3217Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3218 0. depth in parens.
3219 1. character address of start of innermost containing list; nil if none.
3220 2. character address of start of last complete sexp terminated.
3221 3. non-nil if inside a string.
3222 (it is the character that will terminate the string,
3223 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3224 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3225 else an integer (the current comment nesting).
3226 5. t if following a quote character.
3227 6. the minimum paren-depth encountered during this scan.
3228 7. t if in a comment of style b; symbol `syntax-table' if the comment
3229 should be terminated by a generic comment delimiter.
3230 8. character address of start of comment or string; nil if not in one.
3231 9. Intermediate data for continuation of parsing (subject to change).
3232If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3233in parentheses becomes equal to TARGETDEPTH.
3234Fourth arg STOPBEFORE non-nil means stop when come to
3235 any character that starts a sexp.
0e6228bc 3236Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3237 It is used to initialize the state of the parse. Elements number 1, 2, 6
43726c05 3238 and 8 are ignored.
fdb82f93
PJ
3239Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3240 If it is symbol `syntax-table', stop after the start of a comment or a
3241 string, or after end of a comment or a string. */)
5842a27b 3242 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
8489eb67
RS
3243{
3244 struct lisp_parse_state state;
3245 int target;
3246
265a9e55 3247 if (!NILP (targetdepth))
8489eb67 3248 {
b7826503 3249 CHECK_NUMBER (targetdepth);
8489eb67
RS
3250 target = XINT (targetdepth);
3251 }
3252 else
3253 target = -100000; /* We won't reach this depth */
3254
3255 validate_region (&from, &to);
6a140a74
RS
3256 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3257 XINT (to),
c81a3712 3258 target, !NILP (stopbefore), oldstate,
7d0393cf 3259 (NILP (commentstop)
195d1361 3260 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3261
3262 SET_PT (state.location);
7d0393cf 3263
8489eb67
RS
3264 return Fcons (make_number (state.depth),
3265 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3266 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3267 Fcons (state.instring >= 0
3268 ? (state.instring == ST_STRING_STYLE
195d1361 3269 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3270 Fcons (state.incomment < 0 ? Qt :
3271 (state.incomment == 0 ? Qnil :
3272 make_number (state.incomment)),
8489eb67 3273 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3274 Fcons (make_number (state.mindepth),
7d0393cf 3275 Fcons ((state.comstyle
bff37d2a
RS
3276 ? (state.comstyle == ST_COMMENT_STYLE
3277 ? Qsyntax_table : Qt) :
3278 Qnil),
0beaf54f
DL
3279 Fcons (((state.incomment
3280 || (state.instring >= 0))
bff37d2a
RS
3281 ? make_number (state.comstr_start)
3282 : Qnil),
1a102a58 3283 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3284}
3285\f
dfcf069d 3286void
971de7fb 3287init_syntax_once (void)
8489eb67 3288{
78f9a1f7 3289 register int i, c;
8ea151b2 3290 Lisp_Object temp;
8489eb67 3291
5ebaddf5 3292 /* This has to be done here, before we call Fmake_char_table. */
d67b4f80 3293 Qsyntax_table = intern_c_string ("syntax-table");
5ebaddf5
RS
3294 staticpro (&Qsyntax_table);
3295
d67b4f80 3296 /* Intern_C_String this now in case it isn't already done.
5ebaddf5
RS
3297 Setting this variable twice is harmless.
3298 But don't staticpro it here--that is done in alloc.c. */
d67b4f80 3299 Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
5ebaddf5 3300
93da5fff 3301 /* Create objects which can be shared among syntax tables. */
42ebfa31 3302 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3303 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3304 XVECTOR (Vsyntax_code_object)->contents[i]
3305 = Fcons (make_number (i), Qnil);
3306
5ebaddf5
RS
3307 /* Now we are ready to set up this property, so we can
3308 create syntax tables. */
3309 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3310
93da5fff 3311 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3312
5ebaddf5 3313 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3314
aa7b08b4
RS
3315 /* Control characters should not be whitespace. */
3316 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3317 for (i = 0; i <= ' ' - 1; i++)
3318 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3319 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3320
3321 /* Except that a few really are whitespace. */
3322 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3323 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3324 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3325 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3326 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3327 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3328
93da5fff 3329 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3330 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3331 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3332 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3333 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3334 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3335 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3336
3337 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3338 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3339
3340 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3341 Fcons (make_number (Sopen), make_number (')')));
3342 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3343 Fcons (make_number (Sclose), make_number ('(')));
3344 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3345 Fcons (make_number (Sopen), make_number (']')));
3346 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3347 Fcons (make_number (Sclose), make_number ('[')));
3348 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3349 Fcons (make_number (Sopen), make_number ('}')));
3350 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3351 Fcons (make_number (Sclose), make_number ('{')));
3352 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3353 Fcons (make_number ((int) Sstring), Qnil));
3354 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3355 Fcons (make_number ((int) Sescape), Qnil));
3356
93da5fff 3357 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3358 for (i = 0; i < 10; i++)
78f9a1f7
KH
3359 {
3360 c = "_-+*/&|<>="[i];
3361 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3362 }
8489eb67 3363
93da5fff 3364 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3365 for (i = 0; i < 12; i++)
78f9a1f7
KH
3366 {
3367 c = ".,;:?!#@~^'`"[i];
3368 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3369 }
bd25db08
KH
3370
3371 /* All multibyte characters have syntax `word' by default. */
3372 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
5c7b02ab 3373 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
8489eb67
RS
3374}
3375
dfcf069d 3376void
971de7fb 3377syms_of_syntax (void)
8489eb67 3378{
d67b4f80 3379 Qsyntax_table_p = intern_c_string ("syntax-table-p");
8489eb67
RS
3380 staticpro (&Qsyntax_table_p);
3381
93da5fff
KH
3382 staticpro (&Vsyntax_code_object);
3383
a2994c46
KS
3384 staticpro (&gl_state.object);
3385 staticpro (&gl_state.global_code);
3386 staticpro (&gl_state.current_syntax_table);
3387 staticpro (&gl_state.old_prop);
3388
3389 /* Defined in regex.c */
3390 staticpro (&re_match_object);
3391
d67b4f80 3392 Qscan_error = intern_c_string ("scan-error");
7bf5e9e4
RS
3393 staticpro (&Qscan_error);
3394 Fput (Qscan_error, Qerror_conditions,
d67b4f80 3395 pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
7bf5e9e4 3396 Fput (Qscan_error, Qerror_message,
d67b4f80 3397 make_pure_c_string ("Scan error"));
7bf5e9e4 3398
8489eb67 3399 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3400 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3401
195d1361 3402 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3403 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3404Otherwise, that text property is simply ignored.
3405See the info node `(elisp)Syntax Properties' for a description of the
3406`syntax-table' property. */);
195d1361 3407
8489eb67
RS
3408 words_include_escapes = 0;
3409 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3410 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3411
bd25db08 3412 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3413 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3414 multibyte_syntax_as_symbol = 0;
3415
f4ed767f
GM
3416 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3417 &open_paren_in_column_0_is_defun_start,
b222e415 3418 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3419 open_paren_in_column_0_is_defun_start = 1;
3420
8f924df7
KH
3421
3422 DEFVAR_LISP ("find-word-boundary-function-table",
3423 &Vfind_word_boundary_function_table,
869bb237 3424 doc: /*
8f924df7 3425Char table of functions to search for the word boundary.
869bb237
KH
3426Each function is called with two arguments; POS and LIMIT.
3427POS and LIMIT are character positions in the current buffer.
3428
3429If POS is less than LIMIT, POS is at the first character of a word,
3430and the return value of a function is a position after the last
3431character of that word.
3432
3433If POS is not less than LIMIT, POS is at the last character of a word,
3434and the return value of a function is a position at the first
3435character of that word.
3436
3437In both cases, LIMIT bounds the search. */);
8f924df7 3438 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
869bb237 3439
8489eb67
RS
3440 defsubr (&Ssyntax_table_p);
3441 defsubr (&Ssyntax_table);
3442 defsubr (&Sstandard_syntax_table);
3443 defsubr (&Scopy_syntax_table);
3444 defsubr (&Sset_syntax_table);
3445 defsubr (&Schar_syntax);
beefa22e 3446 defsubr (&Smatching_paren);
c65adb44 3447 defsubr (&Sstring_to_syntax);
8489eb67 3448 defsubr (&Smodify_syntax_entry);
62abe9cb 3449 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3450
3451 defsubr (&Sforward_word);
3452
195d1361
RS
3453 defsubr (&Sskip_chars_forward);
3454 defsubr (&Sskip_chars_backward);
3455 defsubr (&Sskip_syntax_forward);
3456 defsubr (&Sskip_syntax_backward);
3457
b3cfe0c8 3458 defsubr (&Sforward_comment);
8489eb67
RS
3459 defsubr (&Sscan_lists);
3460 defsubr (&Sscan_sexps);
3461 defsubr (&Sbackward_prefix_chars);
3462 defsubr (&Sparse_partial_sexp);
3463}
839966f3
KH
3464
3465/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3466 (do not change this comment) */