merging Emacs.app (NeXTstep port)
[bpt/emacs.git] / src / syntax.c
CommitLineData
edfda783 1#include <stdio.h>
8489eb67 2/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
429ab54e 3 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
8cabe764 4 2002, 2003, 2004, 2005, 2006, 2007, 2008
429ab54e 5 Free Software Foundation, Inc.
8489eb67
RS
6
7This file is part of GNU Emacs.
8
9ec0b715 9GNU Emacs is free software: you can redistribute it and/or modify
8489eb67 10it under the terms of the GNU General Public License as published by
9ec0b715
GM
11the Free Software Foundation, either version 3 of the License, or
12(at your option) any later version.
8489eb67
RS
13
14GNU Emacs is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
9ec0b715 20along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
8489eb67
RS
21
22
18160b98 23#include <config.h>
8489eb67
RS
24#include <ctype.h>
25#include "lisp.h"
26#include "commands.h"
27#include "buffer.h"
5c7b02ab 28#include "character.h"
e35f6ff7 29#include "keymap.h"
a1bc88d4 30#include "regex.h"
195d1361
RS
31
32/* Make syntax table lookup grant data in gl_state. */
33#define SYNTAX_ENTRY_VIA_PROPERTY
34
8489eb67 35#include "syntax.h"
195d1361
RS
36#include "intervals.h"
37
38/* We use these constants in place for comment-style and
39 string-ender-char to distinguish comments/strings started by
40 comment_fence and string_fence codes. */
41
42#define ST_COMMENT_STYLE (256 + 1)
43#define ST_STRING_STYLE (256 + 2)
93da5fff 44#include "category.h"
8489eb67 45
7bf5e9e4 46Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
47
48int words_include_escapes;
195d1361 49int parse_sexp_lookup_properties;
8489eb67 50
bd25db08
KH
51/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
52int multibyte_syntax_as_symbol;
53
8ea151b2
RS
54/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
55 if not compiled with GCC. No need to mark it, since it is used
56 only very temporarily. */
57Lisp_Object syntax_temp;
58
f4ed767f
GM
59/* Non-zero means an open parenthesis in column 0 is always considered
60 to be the start of a defun. Zero means an open parenthesis in
61 column 0 has no special meaning. */
62
63int open_paren_in_column_0_is_defun_start;
64
e5d4f4dc
RS
65/* This is the internal form of the parse state used in parse-partial-sexp. */
66
67struct lisp_parse_state
68 {
195d1361
RS
69 int depth; /* Depth at end of parsing. */
70 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 71 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 72 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
73 int quoted; /* Nonzero if just after an escape char at end of parsing */
74 int thislevelstart; /* Char number of most recent start-of-expression at current level */
75 int prevlevelstart; /* Char number of start of containing expression */
195d1361 76 int location; /* Char number at which parsing stopped. */
e5d4f4dc 77 int mindepth; /* Minimum depth seen while scanning. */
195d1361 78 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
79 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
80 of levels (starting from outermost). */
e5d4f4dc
RS
81 };
82\f
37bef230
RS
83/* These variables are a cache for finding the start of a defun.
84 find_start_pos is the place for which the defun start was found.
85 find_start_value is the defun start position found for it.
6a140a74 86 find_start_value_byte is the corresponding byte position.
37bef230
RS
87 find_start_buffer is the buffer it was found in.
88 find_start_begv is the BEGV value when it was found.
89 find_start_modiff is the value of MODIFF when it was found. */
90
91static int find_start_pos;
92static int find_start_value;
6a140a74 93static int find_start_value_byte;
37bef230
RS
94static struct buffer *find_start_buffer;
95static int find_start_begv;
96static int find_start_modiff;
6a140a74
RS
97
98
99static int find_defun_start P_ ((int, int));
932e6895
SM
100static int back_comment P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int, int,
101 EMACS_INT *, EMACS_INT *));
6a140a74 102static int char_quoted P_ ((int, int));
327719ee 103static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int));
b7dbcc19 104static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
932e6895 105static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
6a140a74
RS
106static void scan_sexps_forward P_ ((struct lisp_parse_state *,
107 int, int, int, int,
108 int, Lisp_Object, int));
a1bc88d4 109static int in_classes P_ ((int, Lisp_Object));
195d1361
RS
110\f
111
112struct gl_state_s gl_state; /* Global state of syntax parser. */
113
114INTERVAL interval_of ();
115#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
116 to scan to property-change. */
117
6a140a74
RS
118/* Update gl_state to an appropriate interval which contains CHARPOS. The
119 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 120 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 121 valid at start, the rest is filled basing on OBJECT.
195d1361 122
6a140a74 123 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
124 direction than the intervals - or in an interval. We update the
125 current syntax-table basing on the property of this interval, and
6a140a74 126 update the interval to start further than CHARPOS - or be
195d1361 127 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 128 charpos to call this subroutine again - or be before/after the
195d1361
RS
129 start/end of OBJECT. */
130
131void
6a140a74
RS
132update_syntax_table (charpos, count, init, object)
133 int charpos, count, init;
195d1361
RS
134 Lisp_Object object;
135{
136 Lisp_Object tmp_table;
4ffe723b 137 int cnt = 0, invalidate = 1;
5a774522 138 INTERVAL i;
195d1361
RS
139
140 if (init)
141 {
bb0de084 142 gl_state.old_prop = Qnil;
195d1361
RS
143 gl_state.start = gl_state.b_property;
144 gl_state.stop = gl_state.e_property;
bb0de084
SM
145 i = interval_of (charpos, object);
146 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
147 invalidate = 0;
148 if (NULL_INTERVAL_P (i))
149 return;
f902a008 150 /* interval_of updates only ->position of the return value, so
d80f4cc7 151 update the parents manually to speed up update_interval. */
bb0de084 152 while (!NULL_PARENT (i))
d80f4cc7
RS
153 {
154 if (AM_RIGHT_CHILD (i))
439d5cb4 155 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 156 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
157 - TOTAL_LENGTH (INTERVAL_PARENT (i))
158 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 159 else
439d5cb4 160 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 161 + TOTAL_LENGTH (i);
439d5cb4 162 i = INTERVAL_PARENT (i);
d80f4cc7
RS
163 }
164 i = gl_state.forward_i;
bb0de084 165 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 166 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
167 goto update;
168 }
5a774522 169 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 170
423e705d 171 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 172 or further off. */
195d1361
RS
173 if (NULL_INTERVAL_P (i))
174 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 175 else if (charpos < i->position) /* Move left. */
195d1361
RS
176 {
177 if (count > 0)
f902a008 178 error ("Error in syntax_table logic for intervals <-");
195d1361 179 /* Update the interval. */
6a140a74 180 i = update_interval (i, charpos);
bb0de084 181 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
182 {
183 invalidate = 0;
195d1361 184 gl_state.forward_i = i;
ee0cdb48 185 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 186 }
423e705d 187 }
6a140a74 188 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
189 {
190 if (count < 0)
f902a008 191 error ("Error in syntax_table logic for intervals ->");
195d1361 192 /* Update the interval. */
6a140a74 193 i = update_interval (i, charpos);
bb0de084 194 if (i->position != gl_state.e_property)
195d1361
RS
195 {
196 invalidate = 0;
195d1361 197 gl_state.backward_i = i;
bb0de084 198 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
199 }
200 }
195d1361
RS
201
202 update:
203 tmp_table = textget (i->plist, Qsyntax_table);
204
205 if (invalidate)
206 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 207
423e705d
SM
208 if (invalidate) /* Did not get to adjacent interval. */
209 { /* with the same table => */
210 /* invalidate the old range. */
195d1361
RS
211 if (count > 0)
212 {
213 gl_state.backward_i = i;
bb0de084
SM
214 gl_state.b_property = i->position - gl_state.offset;
215 }
216 else
195d1361 217 {
bb0de084 218 gl_state.forward_i = i;
ee0cdb48 219 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
220 }
221 }
37bef230 222
bb0de084 223 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 224 {
bb0de084
SM
225 gl_state.current_syntax_table = tmp_table;
226 gl_state.old_prop = tmp_table;
227 if (EQ (Fsyntax_table_p (tmp_table), Qt))
228 {
229 gl_state.use_global = 0;
7d0393cf 230 }
bb0de084
SM
231 else if (CONSP (tmp_table))
232 {
233 gl_state.use_global = 1;
234 gl_state.global_code = tmp_table;
235 }
7d0393cf 236 else
bb0de084
SM
237 {
238 gl_state.use_global = 0;
239 gl_state.current_syntax_table = current_buffer->syntax_table;
240 }
195d1361
RS
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
bb0de084
SM
248 {
249 gl_state.e_property = i->position - gl_state.offset;
250 gl_state.forward_i = i;
251 }
252 else
253 {
5a774522
SM
254 gl_state.b_property
255 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
256 gl_state.backward_i = i;
257 }
258 return;
195d1361 259 }
7d0393cf 260 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
261 {
262 if (count > 0)
bb0de084 263 {
5a774522
SM
264 gl_state.e_property
265 = i->position + LENGTH (i) - gl_state.offset
266 /* e_property at EOB is not set to ZV but to ZV+1, so that
267 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
268 having to check eob between the two. */
269 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
270 gl_state.forward_i = i;
271 }
272 else
273 {
274 gl_state.b_property = i->position - gl_state.offset;
275 gl_state.backward_i = i;
276 }
277 return;
195d1361
RS
278 }
279 cnt++;
280 i = count > 0 ? next_interval (i) : previous_interval (i);
281 }
bb0de084
SM
282 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
283 if (count > 0)
284 gl_state.e_property = gl_state.stop;
285 else
286 gl_state.b_property = gl_state.start;
195d1361
RS
287}
288\f
6a140a74
RS
289/* Returns TRUE if char at CHARPOS is quoted.
290 Global syntax-table data should be set up already to be good at CHARPOS
291 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
292
293static int
6a140a74
RS
294char_quoted (charpos, bytepos)
295 register int charpos, bytepos;
195d1361
RS
296{
297 register enum syntaxcode code;
298 register int beg = BEGV;
299 register int quoted = 0;
6a140a74
RS
300 int orig = charpos;
301
edfda783
AR
302#ifdef HAVE_NS
303 /* For some reason keeps getting called w/both 1, then segfaulting
304 due to the definitions of DEC_BOTH and DEC_POS in character.h,
305 which lead to decrementing below initial address and then examining
306 character there. Need to investigate further.. */
307 if (charpos < 2 || bytepos < 2)
308 {
309 //fprintf(stderr,"Returning because charpos = %d, bytepos = %d\n",charpos, bytepos);
310 return 0;
311 }
312#endif
313
6a140a74 314 DEC_BOTH (charpos, bytepos);
195d1361 315
709a47ce 316 while (charpos >= beg)
195d1361 317 {
ab229fdd
AS
318 int c;
319
6a140a74 320 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
327719ee 321 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
ab229fdd 322 code = SYNTAX (c);
6a140a74
RS
323 if (! (code == Scharquote || code == Sescape))
324 break;
325
326 DEC_BOTH (charpos, bytepos);
327 quoted = !quoted;
195d1361 328 }
6a140a74
RS
329
330 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
331 return quoted;
332}
6a140a74
RS
333
334/* Return the bytepos one character after BYTEPOS.
335 We assume that BYTEPOS is not at the end of the buffer. */
336
337INLINE int
338inc_bytepos (bytepos)
339 int bytepos;
340{
ef316cf0
RS
341 if (NILP (current_buffer->enable_multibyte_characters))
342 return bytepos + 1;
343
6a140a74
RS
344 INC_POS (bytepos);
345 return bytepos;
346}
347
348/* Return the bytepos one character before BYTEPOS.
349 We assume that BYTEPOS is not at the start of the buffer. */
350
351INLINE int
352dec_bytepos (bytepos)
353 int bytepos;
354{
ef316cf0
RS
355 if (NILP (current_buffer->enable_multibyte_characters))
356 return bytepos - 1;
357
6a140a74
RS
358 DEC_POS (bytepos);
359 return bytepos;
360}
195d1361 361\f
7cc80f0a
RS
362/* Return a defun-start position before before POS and not too far before.
363 It should be the last one before POS, or nearly the last.
364
365 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 366 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
367
368 We record the information about where the scan started
369 and what its result was, so that another call in the same area
370 can return the same value very quickly.
195d1361
RS
371
372 There is no promise at which position the global syntax data is
373 valid on return from the subroutine, so the caller should explicitly
374 update the global data. */
37bef230
RS
375
376static int
6a140a74
RS
377find_defun_start (pos, pos_byte)
378 int pos, pos_byte;
37bef230 379{
6a140a74 380 int opoint = PT, opoint_byte = PT_BYTE;
37bef230 381
1fd1cc2f
RS
382 if (!open_paren_in_column_0_is_defun_start)
383 {
384 find_start_value_byte = BEGV_BYTE;
385 return BEGV;
386 }
387
37bef230
RS
388 /* Use previous finding, if it's valid and applies to this inquiry. */
389 if (current_buffer == find_start_buffer
390 /* Reuse the defun-start even if POS is a little farther on.
391 POS might be in the next defun, but that's ok.
392 Our value may not be the best possible, but will still be usable. */
393 && pos <= find_start_pos + 1000
394 && pos >= find_start_value
395 && BEGV == find_start_begv
396 && MODIFF == find_start_modiff)
397 return find_start_value;
398
399 /* Back up to start of line. */
6a140a74 400 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 401
195d1361
RS
402 /* We optimize syntax-table lookup for rare updates. Thus we accept
403 only those `^\s(' which are good in global _and_ text-property
404 syntax-tables. */
405 gl_state.current_syntax_table = current_buffer->syntax_table;
406 gl_state.use_global = 0;
1fd1cc2f 407 while (PT > BEGV)
37bef230 408 {
ab229fdd
AS
409 int c;
410
1fd1cc2f
RS
411 /* Open-paren at start of line means we may have found our
412 defun-start. */
327719ee 413 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 414 if (SYNTAX (c) == Sopen)
195d1361 415 {
1fd1cc2f 416 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
327719ee 417 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
ab229fdd 418 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
419 break;
420 /* Now fallback to the default value. */
421 gl_state.current_syntax_table = current_buffer->syntax_table;
422 gl_state.use_global = 0;
195d1361 423 }
1fd1cc2f
RS
424 /* Move to beg of previous line. */
425 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
426 }
427
428 /* Record what we found, for the next try. */
6a140a74
RS
429 find_start_value = PT;
430 find_start_value_byte = PT_BYTE;
37bef230
RS
431 find_start_buffer = current_buffer;
432 find_start_modiff = MODIFF;
433 find_start_begv = BEGV;
434 find_start_pos = pos;
435
6a140a74
RS
436 TEMP_SET_PT_BOTH (opoint, opoint_byte);
437
37bef230
RS
438 return find_start_value;
439}
440\f
f902a008
RS
441/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
442
443static int
444prev_char_comend_first (pos, pos_byte)
445 int pos, pos_byte;
446{
447 int c, val;
448
449 DEC_BOTH (pos, pos_byte);
450 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
451 c = FETCH_CHAR (pos_byte);
452 val = SYNTAX_COMEND_FIRST (c);
453 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
454 return val;
455}
456
457/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
458
3f679f55
SM
459/* static int
460 * prev_char_comstart_first (pos, pos_byte)
461 * int pos, pos_byte;
462 * {
463 * int c, val;
7d0393cf 464 *
3f679f55
SM
465 * DEC_BOTH (pos, pos_byte);
466 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
467 * c = FETCH_CHAR (pos_byte);
468 * val = SYNTAX_COMSTART_FIRST (c);
469 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
470 * return val;
471 * } */
f902a008 472
6a140a74
RS
473/* Checks whether charpos FROM is at the end of a comment.
474 FROM_BYTE is the bytepos corresponding to FROM.
475 Do not move back before STOP.
476
477 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
478 return -1 otherwise.
479
480 If successful, store the charpos of the comment's beginning
481 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
482
483 Global syntax data remains valid for backward search starting at
484 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
485
486static int
95ff8dfc 487back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
932e6895 488 EMACS_INT from, from_byte, stop;
95ff8dfc 489 int comnested, comstyle;
932e6895 490 EMACS_INT *charpos_ptr, *bytepos_ptr;
195d1361
RS
491{
492 /* Look back, counting the parity of string-quotes,
493 and recording the comment-starters seen.
494 When we reach a safe place, assume that's not in a string;
495 then step the main scan to the earliest comment-starter seen
496 an even number of string quotes away from the safe place.
497
498 OFROM[I] is position of the earliest comment-starter seen
499 which is I+2X quotes from the comment-end.
500 PARITY is current parity of quotes from the comment end. */
3ee5041c 501 int string_style = -1; /* Presumed outside of any string. */
195d1361 502 int string_lossage = 0;
f7c436c1 503 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 504 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
505 comment-starter we might see later could be a false positive (hidden
506 inside another comment).
507 Test case: { a (* b } c (* d *) */
508 int comment_lossage = 0;
195d1361 509 int comment_end = from;
6a140a74 510 int comment_end_byte = from_byte;
195d1361 511 int comstart_pos = 0;
6a140a74 512 int comstart_byte;
eb35b628
RS
513 /* Place where the containing defun starts,
514 or 0 if we didn't come across it yet. */
515 int defun_start = 0;
516 int defun_start_byte = 0;
195d1361 517 register enum syntaxcode code;
95ff8dfc 518 int nesting = 1; /* current comment nesting */
ea315ed6 519 int c;
3f679f55
SM
520 int syntax = 0;
521
522 /* FIXME: A }} comment-ender style leads to incorrect behavior
523 in the case of {{ c }}} because we ignore the last two chars which are
524 assumed to be comment-enders although they aren't. */
195d1361
RS
525
526 /* At beginning of range to scan, we're outside of strings;
527 that determines quote parity to the comment-end. */
528 while (from != stop)
529 {
3f679f55
SM
530 int temp_byte, prev_syntax;
531 int com2start, com2end;
6a140a74 532
195d1361 533 /* Move back and examine a character. */
6a140a74 534 DEC_BOTH (from, from_byte);
195d1361
RS
535 UPDATE_SYNTAX_TABLE_BACKWARD (from);
536
3f679f55 537 prev_syntax = syntax;
b7dbcc19 538 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3f679f55 539 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
540 code = SYNTAX (c);
541
3f679f55
SM
542 /* Check for 2-char comment markers. */
543 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
544 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
545 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
546 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
547 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
548 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
549 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
550
551 /* Nasty cases with overlapping 2-char comment markers:
552 - snmp-mode: -- c -- foo -- c --
553 --- c --
554 ------ c --
555 - c-mode: *||*
556 |* *|* *|
557 |*| |* |*|
558 /// */
559
560 /* If a 2-char comment sequence partly overlaps with another,
561 we don't try to be clever. */
562 if (from > stop && (com2end || com2start))
195d1361 563 {
3f679f55
SM
564 int next = from, next_byte = from_byte, next_c, next_syntax;
565 DEC_BOTH (next, next_byte);
566 UPDATE_SYNTAX_TABLE_BACKWARD (next);
b7dbcc19 567 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
3f679f55
SM
568 next_syntax = SYNTAX_WITH_FLAGS (next_c);
569 if (((com2start || comnested)
570 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
571 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
572 || ((com2end || comnested)
573 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
574 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
575 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
576 goto lossage;
577 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 578 }
3f679f55
SM
579
580 if (com2start && comstart_pos == 0)
581 /* We're looking at a comment starter. But it might be a comment
582 ender as well (see snmp-mode). The first time we see one, we
583 need to consider it as a comment starter,
584 and the subsequent times as a comment ender. */
585 com2end = 0;
586
587 /* Turn a 2-char comment sequences into the appropriate syntax. */
588 if (com2end)
589 code = Sendcomment;
590 else if (com2start)
591 code = Scomment;
592 /* Ignore comment starters of a different style. */
593 else if (code == Scomment
594 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
595 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 596 continue;
195d1361 597
9828a477
KH
598 /* Ignore escaped characters, except comment-enders. */
599 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
600 continue;
601
3ee5041c 602 switch (code)
195d1361 603 {
3ee5041c
SM
604 case Sstring_fence:
605 case Scomment_fence:
606 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
607 case Sstring:
608 /* Track parity of quotes. */
609 if (string_style == -1)
610 /* Entering a string. */
611 string_style = c;
612 else if (string_style == c)
613 /* Leaving the string. */
614 string_style = -1;
615 else
616 /* If we have two kinds of string delimiters.
617 There's no way to grok this scanning backwards. */
195d1361 618 string_lossage = 1;
3ee5041c 619 break;
7d0393cf 620
3ee5041c
SM
621 case Scomment:
622 /* We've already checked that it is the relevant comstyle. */
f7c436c1 623 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
624 /* There are odd string quotes involved, so let's be careful.
625 Test case in Pascal: " { " a { " } */
626 goto lossage;
627
f7c436c1
SM
628 if (!comnested)
629 {
630 /* Record best comment-starter so far. */
631 comstart_pos = from;
632 comstart_byte = from_byte;
633 }
634 else if (--nesting <= 0)
95ff8dfc
RS
635 /* nested comments have to be balanced, so we don't need to
636 keep looking for earlier ones. We use here the same (slightly
637 incorrect) reasoning as below: since it is followed by uniform
638 paired string quotes, this comment-start has to be outside of
639 strings, else the comment-end itself would be inside a string. */
640 goto done;
3ee5041c
SM
641 break;
642
02010917 643 case Sendcomment:
3f679f55 644 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 645 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 646 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
647 /* This is the same style of comment ender as ours. */
648 {
649 if (comnested)
650 nesting++;
651 else
652 /* Anything before that can't count because it would match
653 this comment-ender rather than ours. */
654 from = stop; /* Break out of the loop. */
655 }
f7c436c1
SM
656 else if (comstart_pos != 0 || c != '\n')
657 /* We're mixing comment styles here, so we'd better be careful.
658 The (comstart_pos != 0 || c != '\n') check is not quite correct
659 (we should just always set comment_lossage), but removing it
660 would imply that any multiline comment in C would go through
661 lossage, which seems overkill.
662 The failure should only happen in the rare cases such as
663 { (* } *) */
664 comment_lossage = 1;
02010917 665 break;
195d1361 666
02010917
SM
667 case Sopen:
668 /* Assume a defun-start point is outside of strings. */
669 if (open_paren_in_column_0_is_defun_start
670 && (from == stop
671 || (temp_byte = dec_bytepos (from_byte),
672 FETCH_CHAR (temp_byte) == '\n')))
673 {
674 defun_start = from;
675 defun_start_byte = from_byte;
676 from = stop; /* Break out of the loop. */
677 }
eb35b628 678 break;
02010917
SM
679
680 default:
f7c436c1 681 break;
eb35b628 682 }
195d1361
RS
683 }
684
685 if (comstart_pos == 0)
686 {
687 from = comment_end;
6a140a74 688 from_byte = comment_end_byte;
195d1361
RS
689 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
690 }
f7c436c1
SM
691 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
692 or `done'), then we've found the beginning of the non-nested comment. */
693 else if (1) /* !comnested */
195d1361
RS
694 {
695 from = comstart_pos;
6a140a74 696 from_byte = comstart_byte;
789f3320 697 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
698 }
699 else
700 {
3ee5041c
SM
701 struct lisp_parse_state state;
702 lossage:
195d1361
RS
703 /* We had two kinds of string delimiters mixed up
704 together. Decode this going forwards.
02010917 705 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
706 to the one in question; this records where we
707 last passed a comment starter. */
eb35b628
RS
708 /* If we did not already find the defun start, find it now. */
709 if (defun_start == 0)
710 {
711 defun_start = find_defun_start (comment_end, comment_end_byte);
712 defun_start_byte = find_start_value_byte;
713 }
02010917 714 do
195d1361 715 {
02010917
SM
716 scan_sexps_forward (&state,
717 defun_start, defun_start_byte,
718 comment_end, -10000, 0, Qnil, 0);
719 defun_start = comment_end;
720 if (state.incomment == (comnested ? 1 : -1)
721 && state.comstyle == comstyle)
722 from = state.comstr_start;
723 else
724 {
725 from = comment_end;
726 if (state.incomment)
727 /* If comment_end is inside some other comment, maybe ours
728 is nested, so we need to try again from within the
729 surrounding comment. Example: { a (* " *) */
730 {
731 /* FIXME: We should advance by one or two chars. */
732 defun_start = state.comstr_start + 2;
733 defun_start_byte = CHAR_TO_BYTE (defun_start);
734 }
735 }
736 } while (defun_start < comment_end);
737
6a140a74 738 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
739 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
740 }
7d0393cf 741
95ff8dfc 742 done:
6a140a74
RS
743 *charpos_ptr = from;
744 *bytepos_ptr = from_byte;
745
1aa963c8 746 return (from == comment_end) ? -1 : from;
195d1361
RS
747}
748\f
8489eb67 749DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
750 doc: /* Return t if OBJECT is a syntax table.
751Currently, any char-table counts as a syntax table. */)
752 (object)
2203e1e8 753 Lisp_Object object;
8489eb67 754{
2203e1e8 755 if (CHAR_TABLE_P (object)
e704cb4b 756 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
757 return Qt;
758 return Qnil;
759}
760
8ea151b2 761static void
8489eb67
RS
762check_syntax_table (obj)
763 Lisp_Object obj;
764{
47f5f6ae
KS
765 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
766 Qsyntax_table_p, obj);
7d0393cf 767}
8489eb67 768
8489eb67 769DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
770 doc: /* Return the current syntax table.
771This is the one specified by the current buffer. */)
772 ()
8489eb67
RS
773{
774 return current_buffer->syntax_table;
775}
776
777DEFUN ("standard-syntax-table", Fstandard_syntax_table,
778 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
779 doc: /* Return the standard syntax table.
780This is the one used for new buffers. */)
781 ()
8489eb67
RS
782{
783 return Vstandard_syntax_table;
784}
785
786DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
787 doc: /* Construct a new syntax table and return it.
788It is a copy of the TABLE, which defaults to the standard syntax table. */)
789 (table)
8489eb67
RS
790 Lisp_Object table;
791{
8ea151b2
RS
792 Lisp_Object copy;
793
265a9e55 794 if (!NILP (table))
8ea151b2
RS
795 check_syntax_table (table);
796 else
797 table = Vstandard_syntax_table;
798
799 copy = Fcopy_sequence (table);
0f867324
RS
800
801 /* Only the standard syntax table should have a default element.
802 Other syntax tables should inherit from parents instead. */
803 XCHAR_TABLE (copy)->defalt = Qnil;
804
805 /* Copied syntax tables should all have parents.
806 If we copied one with no parent, such as the standard syntax table,
807 use the standard syntax table as the copy's parent. */
808 if (NILP (XCHAR_TABLE (copy)->parent))
809 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 810 return copy;
8489eb67
RS
811}
812
813DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
814 doc: /* Select a new syntax table for the current buffer.
815One argument, a syntax table. */)
816 (table)
8489eb67
RS
817 Lisp_Object table;
818{
4e4dfa78 819 int idx;
8ea151b2 820 check_syntax_table (table);
8489eb67
RS
821 current_buffer->syntax_table = table;
822 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
823 idx = PER_BUFFER_VAR_IDX (syntax_table);
824 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
825 return table;
826}
827\f
828/* Convert a letter which signifies a syntax code
829 into the code it signifies.
195d1361 830 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
831
832unsigned char syntax_spec_code[0400] =
833 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
834 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
835 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
836 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 837 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
838 (char) Smath, 0377, 0377, (char) Squote,
839 (char) Sopen, (char) Sclose, 0377, 0377,
840 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
841 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
842 0377, 0377, 0377, 0377,
843 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 844 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
845 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
846 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
847 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
848 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
849 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
850 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 851 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
852 };
853
195d1361 854/* Indexed by syntax code, give the letter that describes it. */
8489eb67 855
195d1361 856char syntax_code_spec[16] =
8489eb67 857 {
195d1361
RS
858 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
859 '!', '|'
8489eb67 860 };
93da5fff
KH
861
862/* Indexed by syntax code, give the object (cons of syntax code and
863 nil) to be stored in syntax table. Since these objects can be
864 shared among syntax tables, we generate them in advance. By
865 sharing objects, the function `describe-syntax' can give a more
866 compact listing. */
867static Lisp_Object Vsyntax_code_object;
868
8489eb67
RS
869\f
870DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93
PJ
871 doc: /* Return the syntax code of CHARACTER, described by a character.
872For example, if CHARACTER is a word constituent,
873the character `w' is returned.
874The characters that correspond to various syntax codes
875are listed in the documentation of `modify-syntax-entry'. */)
876 (character)
2203e1e8 877 Lisp_Object character;
8489eb67 878{
8ea151b2 879 int char_int;
195d1361
RS
880 gl_state.current_syntax_table = current_buffer->syntax_table;
881
882 gl_state.use_global = 0;
b7826503 883 CHECK_NUMBER (character);
2203e1e8 884 char_int = XINT (character);
8ea151b2 885 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
886}
887
888DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93
PJ
889 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
890 (character)
2203e1e8 891 Lisp_Object character;
beefa22e 892{
8ea151b2 893 int char_int, code;
195d1361
RS
894 gl_state.current_syntax_table = current_buffer->syntax_table;
895 gl_state.use_global = 0;
b7826503 896 CHECK_NUMBER (character);
2203e1e8 897 char_int = XINT (character);
8ea151b2 898 code = SYNTAX (char_int);
a8bd7cd8 899 if (code == Sopen || code == Sclose)
2e34157c 900 return SYNTAX_MATCH (char_int);
beefa22e 901 return Qnil;
8489eb67
RS
902}
903
c65adb44 904DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
905 doc: /* Convert a syntax specification STRING into syntax cell form.
906STRING should be a string as it is allowed as argument of
907`modify-syntax-entry'. Value is the equivalent cons cell
bce3dc36 908\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
fdb82f93
PJ
909text property. */)
910 (string)
c65adb44 911 Lisp_Object string;
8489eb67 912{
2e567bd3 913 register const unsigned char *p;
8489eb67 914 register enum syntaxcode code;
247e20a8 915 int val;
8ea151b2 916 Lisp_Object match;
8489eb67 917
b7826503 918 CHECK_STRING (string);
8489eb67 919
d5db4077 920 p = SDATA (string);
8489eb67
RS
921 code = (enum syntaxcode) syntax_spec_code[*p++];
922 if (((int) code & 0377) == 0377)
b764a653 923 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 924
8ea151b2 925 if (code == Sinherit)
c65adb44 926 return Qnil;
8ea151b2
RS
927
928 if (*p)
d1be9f0f 929 {
93da5fff 930 int len;
7c19e8e1 931 int character = (STRING_CHAR_AND_LENGTH
d5db4077 932 (p, SBYTES (string) - 1, len));
93da5fff 933 XSETINT (match, character);
d1be9f0f
RS
934 if (XFASTINT (match) == ' ')
935 match = Qnil;
93da5fff 936 p += len;
d1be9f0f
RS
937 }
938 else
8ea151b2 939 match = Qnil;
8489eb67 940
8ea151b2 941 val = (int) code;
8489eb67
RS
942 while (*p)
943 switch (*p++)
944 {
945 case '1':
247e20a8 946 val |= 1 << 16;
8489eb67
RS
947 break;
948
949 case '2':
247e20a8 950 val |= 1 << 17;
8489eb67
RS
951 break;
952
953 case '3':
247e20a8 954 val |= 1 << 18;
8489eb67
RS
955 break;
956
957 case '4':
247e20a8 958 val |= 1 << 19;
8489eb67
RS
959 break;
960
961 case 'p':
247e20a8 962 val |= 1 << 20;
8489eb67 963 break;
e5d4f4dc
RS
964
965 case 'b':
247e20a8 966 val |= 1 << 21;
e5d4f4dc 967 break;
95ff8dfc
RS
968
969 case 'n':
970 val |= 1 << 22;
971 break;
8489eb67 972 }
7d0393cf 973
93da5fff 974 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 975 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
976 else
977 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
978 return Fcons (make_number (val), match);
979}
980
fdb82f93 981/* I really don't know why this is interactive
2c9e1900 982 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 983DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 984 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 985 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 986The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93 987 the current buffer's syntax table.
5c7b02ab
KH
988CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
989in the range MIN and MAX are changed.
fdb82f93
PJ
990The first character of NEWENTRY should be one of the following:
991 Space or - whitespace syntax. w word constituent.
992 _ symbol constituent. . punctuation.
993 ( open-parenthesis. ) close-parenthesis.
994 " string quote. \\ escape.
995 $ paired delimiter. ' expression quote or prefix operator.
996 < comment starter. > comment ender.
997 / character-quote. @ inherit from `standard-syntax-table'.
998 | generic string fence. ! generic comment fence.
999
1000Only single-character comment start and end sequences are represented thus.
1001Two-character sequences are represented as described below.
1002The second character of NEWENTRY is the matching parenthesis,
1003 used only if the first character is `(' or `)'.
1004Any additional characters are flags.
1005Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1006 1 means CHAR is the start of a two-char comment start sequence.
1007 2 means CHAR is the second character of such a sequence.
1008 3 means CHAR is the start of a two-char comment end sequence.
1009 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
1010
1011There can be up to two orthogonal comment sequences. This is to support
1012language modes such as C++. By default, all comment sequences are of style
1013a, but you can set the comment sequence style to b (on the second character
1014of a comment-start, or the first character of a comment-end sequence) using
1015this flag:
a50a10a0
PJ
1016 b means CHAR is part of comment sequence b.
1017 n means CHAR is part of a nestable comment sequence.
fdb82f93 1018
a50a10a0 1019 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1020 such characters are treated as whitespace when they occur
a50a10a0
PJ
1021 between expressions.
1022usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
fdb82f93 1023 (c, newentry, syntax_table)
c65adb44
SM
1024 Lisp_Object c, newentry, syntax_table;
1025{
5c7b02ab
KH
1026 if (CONSP (c))
1027 {
8f924df7
KH
1028 CHECK_CHARACTER_CAR (c);
1029 CHECK_CHARACTER_CDR (c);
5c7b02ab
KH
1030 }
1031 else
1032 CHECK_CHARACTER (c);
c65adb44
SM
1033
1034 if (NILP (syntax_table))
1035 syntax_table = current_buffer->syntax_table;
1036 else
1037 check_syntax_table (syntax_table);
8489eb67 1038
5c7b02ab
KH
1039 newentry = Fstring_to_syntax (newentry);
1040 if (CONSP (c))
1041 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1042 else
1043 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
e5b94d44
CY
1044
1045 /* We clear the regexp cache, since character classes can now have
1046 different values from those in the compiled regexps.*/
1047 clear_regexp_cache ();
1048
8489eb67
RS
1049 return Qnil;
1050}
1051\f
1052/* Dump syntax table to buffer in human-readable format */
1053
62abe9cb
SM
1054DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1055 Sinternal_describe_syntax_value, 1, 1, 0,
1056 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1057 (syntax)
1058 Lisp_Object syntax;
8489eb67
RS
1059{
1060 register enum syntaxcode code;
e6365855 1061 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1062 char str[2];
62abe9cb 1063 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1064
8ea151b2
RS
1065 if (NILP (value))
1066 {
62abe9cb
SM
1067 insert_string ("default");
1068 return syntax;
8ea151b2
RS
1069 }
1070
908b7fea
KH
1071 if (CHAR_TABLE_P (value))
1072 {
62abe9cb
SM
1073 insert_string ("deeper char-table ...");
1074 return syntax;
908b7fea
KH
1075 }
1076
8ea151b2
RS
1077 if (!CONSP (value))
1078 {
62abe9cb
SM
1079 insert_string ("invalid");
1080 return syntax;
8ea151b2
RS
1081 }
1082
c1d497be
KR
1083 first = XCAR (value);
1084 match_lisp = XCDR (value);
8ea151b2
RS
1085
1086 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1087 {
62abe9cb
SM
1088 insert_string ("invalid");
1089 return syntax;
8489eb67
RS
1090 }
1091
e704cb4b 1092 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1093 start1 = (XINT (first) >> 16) & 1;
1094 start2 = (XINT (first) >> 17) & 1;
1095 end1 = (XINT (first) >> 18) & 1;
1096 end2 = (XINT (first) >> 19) & 1;
1097 prefix = (XINT (first) >> 20) & 1;
1098 comstyle = (XINT (first) >> 21) & 1;
e6365855 1099 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1100
1101 if ((int) code < 0 || (int) code >= (int) Smax)
1102 {
1103 insert_string ("invalid");
62abe9cb 1104 return syntax;
8489eb67
RS
1105 }
1106 desc = syntax_code_spec[(int) code];
1107
1108 str[0] = desc, str[1] = 0;
1109 insert (str, 1);
1110
93da5fff
KH
1111 if (NILP (match_lisp))
1112 insert (" ", 1);
1113 else
1114 insert_char (XINT (match_lisp));
8489eb67 1115
8489eb67
RS
1116 if (start1)
1117 insert ("1", 1);
1118 if (start2)
1119 insert ("2", 1);
1120
1121 if (end1)
1122 insert ("3", 1);
1123 if (end2)
1124 insert ("4", 1);
1125
1126 if (prefix)
1127 insert ("p", 1);
e5d4f4dc
RS
1128 if (comstyle)
1129 insert ("b", 1);
e6365855
SM
1130 if (comnested)
1131 insert ("n", 1);
8489eb67
RS
1132
1133 insert_string ("\twhich means: ");
1134
0220c518 1135 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1136 {
1137 case Swhitespace:
1138 insert_string ("whitespace"); break;
1139 case Spunct:
1140 insert_string ("punctuation"); break;
1141 case Sword:
1142 insert_string ("word"); break;
1143 case Ssymbol:
1144 insert_string ("symbol"); break;
1145 case Sopen:
1146 insert_string ("open"); break;
1147 case Sclose:
1148 insert_string ("close"); break;
1149 case Squote:
d671382e 1150 insert_string ("prefix"); break;
8489eb67
RS
1151 case Sstring:
1152 insert_string ("string"); break;
1153 case Smath:
1154 insert_string ("math"); break;
1155 case Sescape:
1156 insert_string ("escape"); break;
1157 case Scharquote:
1158 insert_string ("charquote"); break;
1159 case Scomment:
1160 insert_string ("comment"); break;
1161 case Sendcomment:
1162 insert_string ("endcomment"); break;
d671382e
SM
1163 case Sinherit:
1164 insert_string ("inherit"); break;
1165 case Scomment_fence:
1166 insert_string ("comment fence"); break;
1167 case Sstring_fence:
1168 insert_string ("string fence"); break;
8489eb67
RS
1169 default:
1170 insert_string ("invalid");
62abe9cb 1171 return syntax;
8489eb67
RS
1172 }
1173
8ea151b2 1174 if (!NILP (match_lisp))
8489eb67
RS
1175 {
1176 insert_string (", matches ");
8ea151b2 1177 insert_char (XINT (match_lisp));
8489eb67
RS
1178 }
1179
1180 if (start1)
1181 insert_string (",\n\t is the first character of a comment-start sequence");
1182 if (start2)
1183 insert_string (",\n\t is the second character of a comment-start sequence");
1184
1185 if (end1)
1186 insert_string (",\n\t is the first character of a comment-end sequence");
1187 if (end2)
1188 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1189 if (comstyle)
1190 insert_string (" (comment style b)");
e6365855
SM
1191 if (comnested)
1192 insert_string (" (nestable)");
e5d4f4dc 1193
8489eb67
RS
1194 if (prefix)
1195 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1196
62abe9cb
SM
1197 return syntax;
1198}
8489eb67 1199\f
195d1361
RS
1200int parse_sexp_ignore_comments;
1201
8f924df7
KH
1202/* Char-table of functions that find the next or previous word
1203 boundary. */
1204Lisp_Object Vfind_word_boundary_function_table;
869bb237 1205
8489eb67
RS
1206/* Return the position across COUNT words from FROM.
1207 If that many words cannot be found before the end of the buffer, return 0.
1208 COUNT negative means scan backward and stop at word beginning. */
1209
dfcf069d 1210int
8489eb67
RS
1211scan_words (from, count)
1212 register int from, count;
1213{
1214 register int beg = BEGV;
1215 register int end = ZV;
6a140a74 1216 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1217 register enum syntaxcode code;
1218 int ch0, ch1;
8f924df7 1219 Lisp_Object func, script, pos;
8489eb67
RS
1220
1221 immediate_quit = 1;
1222 QUIT;
1223
195d1361
RS
1224 SETUP_SYNTAX_TABLE (from, count);
1225
8489eb67
RS
1226 while (count > 0)
1227 {
1228 while (1)
1229 {
1230 if (from == end)
1231 {
1232 immediate_quit = 0;
1233 return 0;
1234 }
195d1361 1235 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1236 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1237 code = SYNTAX (ch0);
6a140a74 1238 INC_BOTH (from, from_byte);
8489eb67
RS
1239 if (words_include_escapes
1240 && (code == Sescape || code == Scharquote))
1241 break;
1242 if (code == Sword)
1243 break;
8489eb67 1244 }
93da5fff
KH
1245 /* Now CH0 is a character which begins a word and FROM is the
1246 position of the next character. */
8f924df7 1247 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
869bb237 1248 if (! NILP (Ffboundp (func)))
8489eb67 1249 {
869bb237 1250 pos = call2 (func, make_number (from - 1), make_number (end));
8f924df7
KH
1251 if (INTEGERP (pos) && XINT (pos) > from)
1252 {
1253 from = XINT (pos);
1254 from_byte = CHAR_TO_BYTE (from);
1255 }
8489eb67 1256 }
869bb237 1257 else
8cb8232a 1258 {
8cb8232a
KH
1259 script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1260 while (1)
1261 {
1262 if (from == end) break;
1263 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 1264 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1265 code = SYNTAX (ch1);
1266 if ((code != Sword
1267 && (! words_include_escapes
1268 || (code != Sescape && code != Scharquote)))
1269 || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch1), script))
869bb237 1270 break;
8cb8232a
KH
1271 INC_BOTH (from, from_byte);
1272 ch0 = ch1;
1273 }
8489eb67
RS
1274 }
1275 count--;
1276 }
1277 while (count < 0)
1278 {
1279 while (1)
1280 {
1281 if (from == beg)
1282 {
1283 immediate_quit = 0;
1284 return 0;
1285 }
6a140a74 1286 DEC_BOTH (from, from_byte);
195d1361 1287 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 1288 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
93da5fff 1289 code = SYNTAX (ch1);
8489eb67
RS
1290 if (words_include_escapes
1291 && (code == Sescape || code == Scharquote))
1292 break;
1293 if (code == Sword)
1294 break;
8489eb67 1295 }
93da5fff
KH
1296 /* Now CH1 is a character which ends a word and FROM is the
1297 position of it. */
8f924df7 1298 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
869bb237 1299 if (! NILP (Ffboundp (func)))
8f924df7 1300 {
869bb237 1301 pos = call2 (func, make_number (from), make_number (beg));
8f924df7
KH
1302 if (INTEGERP (pos) && XINT (pos) < from)
1303 {
1304 from = XINT (pos);
1305 from_byte = CHAR_TO_BYTE (from);
1306 }
8489eb67 1307 }
869bb237 1308 else
8489eb67 1309 {
8cb8232a
KH
1310 script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1311 while (1)
1312 {
8cb8232a
KH
1313 if (from == beg)
1314 break;
262be72a 1315 DEC_BOTH (from, from_byte);
8cb8232a 1316 UPDATE_SYNTAX_TABLE_BACKWARD (from);
262be72a 1317 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8cb8232a
KH
1318 code = SYNTAX (ch0);
1319 if ((code != Sword
1320 && (! words_include_escapes
1321 || (code != Sescape && code != Scharquote)))
1322 || ! EQ (CHAR_TABLE_REF (Vchar_script_table, ch0), script))
262be72a
MB
1323 {
1324 INC_BOTH (from, from_byte);
1325 break;
1326 }
8cb8232a
KH
1327 ch1 = ch0;
1328 }
8489eb67
RS
1329 }
1330 count++;
1331 }
1332
1333 immediate_quit = 0;
1334
1335 return from;
1336}
1337
1e9dbb5f 1338DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
fdb82f93
PJ
1339 doc: /* Move point forward ARG words (backward if ARG is negative).
1340Normally returns t.
1341If an edge of the buffer or a field boundary is reached, point is left there
1342and the function returns nil. Field boundaries are not noticed if
1343`inhibit-field-text-motion' is non-nil. */)
839966f3
KH
1344 (arg)
1345 Lisp_Object arg;
8489eb67 1346{
c096ae4d 1347 Lisp_Object tmp;
2c6ea900 1348 int orig_val, val;
8489eb67 1349
839966f3
KH
1350 if (NILP (arg))
1351 XSETFASTINT (arg, 1);
1352 else
1353 CHECK_NUMBER (arg);
1354
1355 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1356 if (! orig_val)
839966f3 1357 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1358
b8855607 1359 /* Avoid jumping out of an input field. */
c096ae4d
SM
1360 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1361 Qt, Qnil, Qnil);
1362 val = XFASTINT (tmp);
7d0393cf 1363
8489eb67 1364 SET_PT (val);
e6d8341f 1365 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1366}
1367\f
195d1361
RS
1368Lisp_Object skip_chars ();
1369
1370DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1371 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1372STRING is like the inside of a `[...]' in a regular expression
1373except that `]' is never special and `\\' quotes `^', `-' or `\\'
1374 (but not as the end of a range; quoting is never needed there).
1375Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1376With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1377Char classes, e.g. `[:alpha:]', are supported.
1378
1379Returns the distance traveled, either zero or positive. */)
fdb82f93 1380 (string, lim)
195d1361
RS
1381 Lisp_Object string, lim;
1382{
327719ee 1383 return skip_chars (1, string, lim, 1);
195d1361
RS
1384}
1385
1386DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1387 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1388See `skip-chars-forward' for details.
1389Returns the distance traveled, either zero or negative. */)
1390 (string, lim)
195d1361
RS
1391 Lisp_Object string, lim;
1392{
327719ee 1393 return skip_chars (0, string, lim, 1);
195d1361
RS
1394}
1395
1396DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1397 doc: /* Move point forward across chars in specified syntax classes.
1398SYNTAX is a string of syntax code characters.
1399Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1400If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1401This function returns the distance traveled, either zero or positive. */)
1402 (syntax, lim)
195d1361
RS
1403 Lisp_Object syntax, lim;
1404{
b7dbcc19 1405 return skip_syntaxes (1, syntax, lim);
195d1361
RS
1406}
1407
1408DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1409 doc: /* Move point backward across chars in specified syntax classes.
1410SYNTAX is a string of syntax code characters.
1411Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1412If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1413This function returns the distance traveled, either zero or negative. */)
1414 (syntax, lim)
195d1361
RS
1415 Lisp_Object syntax, lim;
1416{
b7dbcc19 1417 return skip_syntaxes (0, syntax, lim);
195d1361
RS
1418}
1419
6a140a74 1420static Lisp_Object
327719ee 1421skip_chars (forwardp, string, lim, handle_iso_classes)
b7dbcc19 1422 int forwardp;
195d1361 1423 Lisp_Object string, lim;
a1bc88d4 1424 int handle_iso_classes;
195d1361 1425{
195d1361
RS
1426 register unsigned int c;
1427 unsigned char fastmap[0400];
b7dbcc19 1428 /* Store the ranges of non-ASCII characters. */
5df4982e 1429 int *char_ranges;
7e68b0ea 1430 int n_char_ranges = 0;
195d1361 1431 int negate = 0;
4101e6fe 1432 register int i, i_byte;
b7dbcc19
KH
1433 /* Set to 1 if the current buffer is multibyte and the region
1434 contains non-ASCII chars. */
1435 int multibyte;
1436 /* Set to 1 if STRING is multibyte and it contains non-ASCII
1437 chars. */
1674d9a2
RS
1438 int string_multibyte;
1439 int size_byte;
2e567bd3 1440 const unsigned char *str;
82d497fc 1441 int len;
a1bc88d4 1442 Lisp_Object iso_classes;
195d1361 1443
b7826503 1444 CHECK_STRING (string);
a1bc88d4 1445 iso_classes = Qnil;
82d497fc 1446
195d1361
RS
1447 if (NILP (lim))
1448 XSETINT (lim, forwardp ? ZV : BEGV);
1449 else
b7826503 1450 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1451
1452 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1453 if (XINT (lim) > ZV)
1454 XSETFASTINT (lim, ZV);
1455 if (XINT (lim) < BEGV)
1456 XSETFASTINT (lim, BEGV);
1457
b7dbcc19 1458 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
92eaa22e 1459 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
8f924df7 1460 string_multibyte = SBYTES (string) > SCHARS (string);
b7dbcc19 1461
195d1361
RS
1462 bzero (fastmap, sizeof fastmap);
1463
8f924df7
KH
1464 str = SDATA (string);
1465 size_byte = SBYTES (string);
4101e6fe 1466
82d497fc 1467 i_byte = 0;
13090112 1468 if (i_byte < size_byte
d5db4077 1469 && SREF (string, 0) == '^')
195d1361 1470 {
82d497fc 1471 negate = 1; i_byte++;
195d1361
RS
1472 }
1473
1474 /* Find the characters specified and set their elements of fastmap.
b7dbcc19 1475 Handle backslashes and ranges specially.
195d1361 1476
b7dbcc19
KH
1477 If STRING contains non-ASCII characters, setup char_ranges for
1478 them and use fastmap only for their leading codes. */
4101e6fe 1479
b7dbcc19 1480 if (! string_multibyte)
195d1361 1481 {
b7dbcc19 1482 int string_has_eight_bit = 0;
4101e6fe 1483
b7dbcc19
KH
1484 /* At first setup fastmap. */
1485 while (i_byte < size_byte)
1486 {
1487 c = str[i_byte++];
1488
a1bc88d4
RS
1489 if (handle_iso_classes && c == '['
1490 && i_byte < size_byte
327719ee 1491 && str[i_byte] == ':')
a1bc88d4
RS
1492 {
1493 const unsigned char *class_beg = str + i_byte + 1;
1494 const unsigned char *class_end = class_beg;
b3bda4fd 1495 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1496 /* Leave room for the null. */
a1bc88d4
RS
1497 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1498 re_wctype_t cc;
1499
1500 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1501 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1502
b3bda4fd
KS
1503 while (class_end < class_limit
1504 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1505 class_end++;
1506
b3bda4fd
KS
1507 if (class_end == class_beg
1508 || *class_end != ':' || class_end[1] != ']')
1509 goto not_a_class_name;
a1bc88d4
RS
1510
1511 bcopy (class_beg, class_name, class_end - class_beg);
1512 class_name[class_end - class_beg] = 0;
1513
1514 cc = re_wctype (class_name);
1515 if (cc == 0)
1516 error ("Invalid ISO C character class");
1517
1518 iso_classes = Fcons (make_number (cc), iso_classes);
1519
1520 i_byte = class_end + 2 - str;
1521 continue;
1522 }
1523
b3bda4fd 1524 not_a_class_name:
b7dbcc19
KH
1525 if (c == '\\')
1526 {
13090112 1527 if (i_byte == size_byte)
4101e6fe
RS
1528 break;
1529
5c7b02ab 1530 c = str[i_byte++];
195d1361 1531 }
839966f3
KH
1532 /* Treat `-' as range character only if another character
1533 follows. */
1534 if (i_byte + 1 < size_byte
82d497fc 1535 && str[i_byte] == '-')
195d1361 1536 {
9690d026 1537 unsigned int c2;
4101e6fe
RS
1538
1539 /* Skip over the dash. */
82d497fc 1540 i_byte++;
4101e6fe 1541
4101e6fe 1542 /* Get the end of the range. */
5c7b02ab 1543 c2 = str[i_byte++];
b7dbcc19
KH
1544 if (c2 == '\\'
1545 && i_byte < size_byte)
1546 c2 = str[i_byte++];
1547
8f924df7
KH
1548 if (c <= c2)
1549 {
1550 while (c <= c2)
1551 fastmap[c++] = 1;
1552 if (! ASCII_CHAR_P (c2))
1553 string_has_eight_bit = 1;
1554 }
195d1361
RS
1555 }
1556 else
b7dbcc19
KH
1557 {
1558 fastmap[c] = 1;
1559 if (! ASCII_CHAR_P (c))
1560 string_has_eight_bit = 1;
1561 }
1562 }
1563
1564 /* If the current range is multibyte and STRING contains
1565 eight-bit chars, arrange fastmap and setup char_ranges for
1566 the corresponding multibyte chars. */
1567 if (multibyte && string_has_eight_bit)
1568 {
1569 unsigned char fastmap2[0400];
1570 int range_start_byte, range_start_char;
1571
1572 bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
1573 bzero (fastmap + 0200, 0200);
1574 /* We are sure that this loop stops. */
1575 for (i = 0200; ! fastmap2[i]; i++);
1576 c = unibyte_char_to_multibyte (i);
1577 fastmap[CHAR_LEADING_CODE (c)] = 1;
1578 range_start_byte = i;
1579 range_start_char = c;
f003ca54 1580 char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
b7dbcc19
KH
1581 for (i = 129; i < 0400; i++)
1582 {
1583 c = unibyte_char_to_multibyte (i);
1584 fastmap[CHAR_LEADING_CODE (c)] = 1;
1585 if (i - range_start_byte != c - range_start_char)
1586 {
1587 char_ranges[n_char_ranges++] = range_start_char;
1588 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1589 + range_start_char);
1590 range_start_byte = i;
1591 range_start_char = c;
8f924df7 1592 }
b7dbcc19
KH
1593 }
1594 char_ranges[n_char_ranges++] = range_start_char;
1595 char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1596 + range_start_char);
195d1361
RS
1597 }
1598 }
f003ca54 1599 else /* STRING is multibyte */
b7dbcc19 1600 {
f003ca54
KH
1601 char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1602
b7dbcc19 1603 while (i_byte < size_byte)
195d1361 1604 {
b7dbcc19 1605 unsigned char leading_code;
5c7b02ab 1606
b7dbcc19
KH
1607 leading_code = str[i_byte];
1608 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
1609 i_byte += len;
5c7b02ab 1610
327719ee
MB
1611 if (handle_iso_classes && c == '['
1612 && i_byte < size_byte
1613 && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
1614 {
1615 const unsigned char *class_beg = str + i_byte + 1;
1616 const unsigned char *class_end = class_beg;
1617 const unsigned char *class_limit = str + size_byte - 2;
1618 /* Leave room for the null. */
1619 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1620 re_wctype_t cc;
1621
1622 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1623 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1624
1625 while (class_end < class_limit
1626 && *class_end >= 'a' && *class_end <= 'z')
1627 class_end++;
1628
1629 if (class_end == class_beg
1630 || *class_end != ':' || class_end[1] != ']')
1631 goto not_a_class_name_multibyte;
1632
1633 bcopy (class_beg, class_name, class_end - class_beg);
1634 class_name[class_end - class_beg] = 0;
1635
1636 cc = re_wctype (class_name);
1637 if (cc == 0)
1638 error ("Invalid ISO C character class");
1639
1640 iso_classes = Fcons (make_number (cc), iso_classes);
1641
1642 i_byte = class_end + 2 - str;
1643 continue;
1644 }
1645
1646 not_a_class_name_multibyte:
195d1361
RS
1647 if (c == '\\')
1648 {
13090112 1649 if (i_byte == size_byte)
4101e6fe
RS
1650 break;
1651
b7dbcc19 1652 leading_code = str[i_byte];
839966f3
KH
1653 c = STRING_CHAR_AND_LENGTH (str + i_byte,
1654 size_byte - i_byte, len);
82d497fc 1655 i_byte += len;
195d1361 1656 }
839966f3
KH
1657 /* Treat `-' as range character only if another character
1658 follows. */
1659 if (i_byte + 1 < size_byte
82d497fc 1660 && str[i_byte] == '-')
195d1361 1661 {
9690d026 1662 unsigned int c2;
b7dbcc19 1663 unsigned char leading_code2;
4101e6fe
RS
1664
1665 /* Skip over the dash. */
82d497fc 1666 i_byte++;
4101e6fe 1667
4101e6fe 1668 /* Get the end of the range. */
b7dbcc19 1669 leading_code2 = str[i_byte];
839966f3
KH
1670 c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
1671 size_byte - i_byte, len);
82d497fc 1672 i_byte += len;
7e68b0ea 1673
b7dbcc19
KH
1674 if (c2 == '\\'
1675 && i_byte < size_byte)
1676 {
1677 leading_code2 = str[i_byte];
1678 c2 =STRING_CHAR_AND_LENGTH (str + i_byte, size_byte-i_byte, len);
1679 i_byte += len;
1680 }
1681
f003ca54
KH
1682 if (c > c2)
1683 continue;
b7dbcc19 1684 if (ASCII_CHAR_P (c))
06274af5 1685 {
5c7b02ab
KH
1686 while (c <= c2 && c < 0x80)
1687 fastmap[c++] = 1;
b7dbcc19
KH
1688 leading_code = CHAR_LEADING_CODE (c);
1689 }
1690 if (! ASCII_CHAR_P (c))
1691 {
1692 while (leading_code <= leading_code2)
1693 fastmap[leading_code++] = 1;
1694 if (c <= c2)
e39091b6 1695 {
b7dbcc19 1696 char_ranges[n_char_ranges++] = c;
e39091b6 1697 char_ranges[n_char_ranges++] = c2;
06274af5
KH
1698 }
1699 }
195d1361
RS
1700 }
1701 else
7e68b0ea 1702 {
b7dbcc19 1703 if (ASCII_CHAR_P (c))
e39091b6
KH
1704 fastmap[c] = 1;
1705 else
7e68b0ea 1706 {
b7dbcc19 1707 fastmap[leading_code] = 1;
4101e6fe
RS
1708 char_ranges[n_char_ranges++] = c;
1709 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1710 }
1711 }
195d1361 1712 }
b7dbcc19
KH
1713
1714 /* If the current range is unibyte and STRING contains non-ASCII
1715 chars, arrange fastmap for the corresponding unibyte
1716 chars. */
1717
1718 if (! multibyte && n_char_ranges > 0)
1719 {
1720 bzero (fastmap + 0200, 0200);
1721 for (i = 0; i < n_char_ranges; i += 2)
1722 {
1723 int c1 = char_ranges[i];
1724 int c2 = char_ranges[i + 1];
1725
1726 for (; c1 <= c2; c1++)
2afc21f5
SM
1727 {
1728 int b = CHAR_TO_BYTE_SAFE (c1);
1729 if (b >= 0)
1730 fastmap[b] = 1;
1731 }
b7dbcc19
KH
1732 }
1733 }
195d1361
RS
1734 }
1735
9690d026 1736 /* If ^ was the first character, complement the fastmap. */
195d1361 1737 if (negate)
b7dbcc19
KH
1738 {
1739 if (! multibyte)
1740 for (i = 0; i < sizeof fastmap; i++)
1741 fastmap[i] ^= 1;
1742 else
1743 {
1744 for (i = 0; i < 0200; i++)
1745 fastmap[i] ^= 1;
1746 /* All non-ASCII chars possibly match. */
1747 for (; i < sizeof fastmap; i++)
1748 fastmap[i] = 1;
1749 }
1750 }
195d1361
RS
1751
1752 {
1753 int start_point = PT;
1754 int pos = PT;
6a140a74 1755 int pos_byte = PT_BYTE;
9af7511a
KH
1756 unsigned char *p = PT_ADDR, *endp, *stop;
1757
1758 if (forwardp)
1759 {
1760 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1761 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1762 }
1763 else
1764 {
1765 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1766 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1767 }
195d1361
RS
1768
1769 immediate_quit = 1;
b7dbcc19 1770 if (forwardp)
195d1361 1771 {
b7dbcc19 1772 if (multibyte)
8f924df7 1773 while (1)
b7dbcc19 1774 {
8f924df7 1775 int nbytes;
9af7511a 1776
8f924df7 1777 if (p >= stop)
9af7511a 1778 {
8f924df7 1779 if (p >= endp)
9af7511a 1780 break;
8f924df7
KH
1781 p = GAP_END_ADDR;
1782 stop = endp;
9af7511a 1783 }
327719ee
MB
1784 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1785 if (! NILP (iso_classes) && in_classes (c, iso_classes))
9af7511a 1786 {
327719ee 1787 if (negate)
9af7511a 1788 break;
327719ee
MB
1789 else
1790 goto fwd_ok;
9af7511a 1791 }
9af7511a 1792
8f924df7 1793 if (! fastmap[*p])
b7dbcc19
KH
1794 break;
1795 if (! ASCII_CHAR_P (c))
9af7511a 1796 {
b7dbcc19
KH
1797 /* As we are looking at a multibyte character, we
1798 must look up the character in the table
1799 CHAR_RANGES. If there's no data in the table,
1800 that character is not what we want to skip. */
1801
1802 /* The following code do the right thing even if
1803 n_char_ranges is zero (i.e. no data in
1804 CHAR_RANGES). */
1805 for (i = 0; i < n_char_ranges; i += 2)
1806 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
9af7511a 1807 break;
b7dbcc19
KH
1808 if (!(negate ^ (i < n_char_ranges)))
1809 break;
9af7511a 1810 }
327719ee 1811 fwd_ok:
8f924df7 1812 p += nbytes, pos++, pos_byte += nbytes;
b7dbcc19
KH
1813 }
1814 else
8f924df7
KH
1815 while (1)
1816 {
1817 if (p >= stop)
9af7511a 1818 {
8f924df7 1819 if (p >= endp)
9af7511a 1820 break;
8f924df7
KH
1821 p = GAP_END_ADDR;
1822 stop = endp;
9af7511a 1823 }
327719ee
MB
1824
1825 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1826 {
1827 if (negate)
1828 break;
1829 else
1830 goto fwd_unibyte_ok;
1831 }
1832
8f924df7
KH
1833 if (!fastmap[*p])
1834 break;
327719ee 1835 fwd_unibyte_ok:
8f924df7
KH
1836 p++, pos++, pos_byte++;
1837 }
195d1361
RS
1838 }
1839 else
1840 {
b7dbcc19 1841 if (multibyte)
8f924df7 1842 while (1)
b7dbcc19 1843 {
8f924df7 1844 unsigned char *prev_p;
9af7511a 1845
8f924df7 1846 if (p <= stop)
9af7511a 1847 {
8f924df7 1848 if (p <= endp)
9af7511a 1849 break;
8f924df7
KH
1850 p = GPT_ADDR;
1851 stop = endp;
9af7511a 1852 }
8f924df7
KH
1853 prev_p = p;
1854 while (--p >= stop && ! CHAR_HEAD_P (*p));
327719ee 1855 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
a1bc88d4 1856
327719ee
MB
1857 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1858 {
1859 if (negate)
1860 break;
9690d026 1861 else
327719ee 1862 goto back_ok;
7e68b0ea 1863 }
a1bc88d4 1864
8f924df7 1865 if (! fastmap[*p])
b7dbcc19
KH
1866 break;
1867 if (! ASCII_CHAR_P (c))
7e68b0ea 1868 {
b7dbcc19
KH
1869 /* See the comment in the previous similar code. */
1870 for (i = 0; i < n_char_ranges; i += 2)
1871 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1872 break;
1873 if (!(negate ^ (i < n_char_ranges)))
1874 break;
7e68b0ea 1875 }
327719ee 1876 back_ok:
8f924df7 1877 pos--, pos_byte -= prev_p - p;
b7dbcc19
KH
1878 }
1879 else
8f924df7
KH
1880 while (1)
1881 {
1882 if (p <= stop)
9af7511a 1883 {
8f924df7 1884 if (p <= endp)
9af7511a 1885 break;
8f924df7
KH
1886 p = GPT_ADDR;
1887 stop = endp;
9af7511a 1888 }
e39091b6 1889
327719ee
MB
1890 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1891 {
1892 if (negate)
1893 break;
9af7511a 1894 else
327719ee
MB
1895 goto back_unibyte_ok;
1896 }
a1bc88d4 1897
8f924df7
KH
1898 if (!fastmap[p[-1]])
1899 break;
327719ee 1900 back_unibyte_ok:
8f924df7
KH
1901 p--, pos--, pos_byte--;
1902 }
195d1361 1903 }
7e68b0ea 1904
b7dbcc19
KH
1905 SET_PT_BOTH (pos, pos_byte);
1906 immediate_quit = 0;
1907
1908 return make_number (PT - start_point);
1909 }
1910}
7e68b0ea 1911
b7dbcc19
KH
1912
1913static Lisp_Object
1914skip_syntaxes (forwardp, string, lim)
1915 int forwardp;
1916 Lisp_Object string, lim;
1917{
1918 register unsigned int c;
1919 unsigned char fastmap[0400];
1920 int negate = 0;
1921 register int i, i_byte;
1922 int multibyte;
1923 int size_byte;
1924 unsigned char *str;
1925
1926 CHECK_STRING (string);
1927
1928 if (NILP (lim))
1929 XSETINT (lim, forwardp ? ZV : BEGV);
1930 else
1931 CHECK_NUMBER_COERCE_MARKER (lim);
1932
1933 /* In any case, don't allow scan outside bounds of buffer. */
1934 if (XINT (lim) > ZV)
1935 XSETFASTINT (lim, ZV);
1936 if (XINT (lim) < BEGV)
1937 XSETFASTINT (lim, BEGV);
1938
ade8ee9e 1939 if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
49feb1cd 1940 return make_number (0);
8c6e735b 1941
b7dbcc19 1942 multibyte = (!NILP (current_buffer->enable_multibyte_characters)
92eaa22e 1943 && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
b7dbcc19
KH
1944
1945 bzero (fastmap, sizeof fastmap);
1946
8f924df7
KH
1947 if (SBYTES (string) > SCHARS (string))
1948 /* As this is very rare case (syntax spec is ASCII only), don't
1949 consider efficiency. */
bc796a59
KH
1950 string = string_make_unibyte (string);
1951
8f924df7
KH
1952 str = SDATA (string);
1953 size_byte = SBYTES (string);
bc796a59 1954
b7dbcc19
KH
1955 i_byte = 0;
1956 if (i_byte < size_byte
8f924df7 1957 && SREF (string, 0) == '^')
b7dbcc19
KH
1958 {
1959 negate = 1; i_byte++;
1960 }
1961
b7dbcc19
KH
1962 /* Find the syntaxes specified and set their elements of fastmap. */
1963
1964 while (i_byte < size_byte)
1965 {
1966 c = str[i_byte++];
8f924df7 1967 fastmap[syntax_spec_code[c]] = 1;
b7dbcc19 1968 }
195d1361 1969
9690d026 1970 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1971 if (negate)
1972 for (i = 0; i < sizeof fastmap; i++)
9690d026 1973 fastmap[i] ^= 1;
195d1361
RS
1974
1975 {
1976 int start_point = PT;
1977 int pos = PT;
6a140a74 1978 int pos_byte = PT_BYTE;
8f924df7
KH
1979 unsigned char *p = PT_ADDR, *endp, *stop;
1980
1981 if (forwardp)
1982 {
1983 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1984 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1985 }
1986 else
1987 {
1988 endp = CHAR_POS_ADDR (XINT (lim));
1989 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1990 }
195d1361
RS
1991
1992 immediate_quit = 1;
b7dbcc19
KH
1993 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1994 if (forwardp)
195d1361 1995 {
b7dbcc19 1996 if (multibyte)
195d1361 1997 {
8f924df7 1998 while (1)
8c6e735b 1999 {
8f924df7
KH
2000 int nbytes;
2001
2002 if (p >= stop)
2003 {
2004 if (p >= endp)
2005 break;
2006 p = GAP_END_ADDR;
2007 stop = endp;
2008 }
2009 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
2010 if (! fastmap[(int) SYNTAX (c)])
8c6e735b 2011 break;
8f924df7 2012 p += nbytes, pos++, pos_byte += nbytes;
8c6e735b
KH
2013 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2014 }
195d1361
RS
2015 }
2016 else
2017 {
8c6e735b 2018 while (1)
7e68b0ea 2019 {
8f924df7
KH
2020 if (p >= stop)
2021 {
2022 if (p >= endp)
2023 break;
2024 p = GAP_END_ADDR;
2025 stop = endp;
2026 }
2027 if (! fastmap[(int) SYNTAX (*p)])
8c6e735b 2028 break;
8f924df7 2029 p++, pos++, pos_byte++;
b7dbcc19 2030 UPDATE_SYNTAX_TABLE_FORWARD (pos);
195d1361
RS
2031 }
2032 }
2033 }
2034 else
2035 {
b7dbcc19 2036 if (multibyte)
195d1361 2037 {
8c6e735b 2038 while (1)
b7dbcc19 2039 {
8f924df7
KH
2040 unsigned char *prev_p;
2041
2042 if (p <= stop)
b7dbcc19 2043 {
8f924df7
KH
2044 if (p <= endp)
2045 break;
2046 p = GPT_ADDR;
2047 stop = endp;
b7dbcc19 2048 }
262be72a 2049 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7
KH
2050 prev_p = p;
2051 while (--p >= stop && ! CHAR_HEAD_P (*p));
2052 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
2053 if (! fastmap[(int) SYNTAX (c)])
2054 break;
2055 pos--, pos_byte -= prev_p - p;
b7dbcc19 2056 }
195d1361
RS
2057 }
2058 else
2059 {
8c6e735b
KH
2060 while (1)
2061 {
8f924df7
KH
2062 if (p <= stop)
2063 {
2064 if (p <= endp)
2065 break;
2066 p = GPT_ADDR;
2067 stop = endp;
2068 }
102f6132 2069 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
8f924df7 2070 if (! fastmap[(int) SYNTAX (p[-1])])
8c6e735b 2071 break;
8f924df7 2072 p--, pos--, pos_byte--;
8c6e735b 2073 }
195d1361
RS
2074 }
2075 }
6a140a74
RS
2076
2077 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
2078 immediate_quit = 0;
2079
2080 return make_number (PT - start_point);
2081 }
2082}
a1bc88d4
RS
2083
2084/* Return 1 if character C belongs to one of the ISO classes
2085 in the list ISO_CLASSES. Each class is represented by an
2086 integer which is its type according to re_wctype. */
2087
2088static int
2089in_classes (c, iso_classes)
2090 int c;
2091 Lisp_Object iso_classes;
2092{
2093 int fits_class = 0;
2094
2095 while (! NILP (iso_classes))
2096 {
2097 Lisp_Object elt;
2098 elt = XCAR (iso_classes);
2099 iso_classes = XCDR (iso_classes);
2100
2101 if (re_iswctype (c, XFASTINT (elt)))
2102 fits_class = 1;
2103 }
2104
2105 return fits_class;
2106}
195d1361 2107\f
95ff8dfc
RS
2108/* Jump over a comment, assuming we are at the beginning of one.
2109 FROM is the current position.
2110 FROM_BYTE is the bytepos corresponding to FROM.
2111 Do not move past STOP (a charpos).
2112 The comment over which we have to jump is of style STYLE
2113 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2114 NESTING should be positive to indicate the nesting at the beginning
2115 for nested comments and should be zero or negative else.
2116 ST_COMMENT_STYLE cannot be nested.
2117 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2118 (or 0 If the search cannot start in the middle of a two-character).
2119
2120 If successful, return 1 and store the charpos of the comment's end
2121 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2122 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2123 corresponding bytepos into *BYTEPOS_PTR and the current nesting
2124 (as defined for state.incomment) in *INCOMMENT_PTR.
2125
2126 The comment end is the last character of the comment rather than the
2127 character just after the comment.
2128
2129 Global syntax data is assumed to initially be valid for FROM and
2130 remains valid for forward search starting at the returned position. */
2131
2132static int
2133forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
2134 charpos_ptr, bytepos_ptr, incomment_ptr)
a0aa7fcf 2135 EMACS_INT from, from_byte, stop;
95ff8dfc 2136 int nesting, style, prev_syntax;
a0aa7fcf
SM
2137 EMACS_INT *charpos_ptr, *bytepos_ptr;
2138 int *incomment_ptr;
95ff8dfc
RS
2139{
2140 register int c, c1;
2141 register enum syntaxcode code;
2142 register int syntax;
2143
2144 if (nesting <= 0) nesting = -1;
2145
2146 /* Enter the loop in the middle so that we find
2147 a 2-char comment ender if we start in the middle of it. */
2148 syntax = prev_syntax;
2149 if (syntax != 0) goto forw_incomment;
2150
2151 while (1)
2152 {
2153 if (from == stop)
2154 {
2155 *incomment_ptr = nesting;
2156 *charpos_ptr = from;
2157 *bytepos_ptr = from_byte;
2158 return 0;
2159 }
8f924df7 2160 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2161 syntax = SYNTAX_WITH_FLAGS (c);
2162 code = syntax & 0xff;
2163 if (code == Sendcomment
2164 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
2165 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2166 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
2167 /* we have encountered a comment end of the same style
2168 as the comment sequence which began this comment
2169 section */
2170 break;
2171 if (code == Scomment_fence
2172 && style == ST_COMMENT_STYLE)
2173 /* we have encountered a comment end of the same style
2174 as the comment sequence which began this comment
2175 section. */
2176 break;
2177 if (nesting > 0
2178 && code == Scomment
abf8a9ff 2179 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
2180 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
2181 /* we have encountered a nested comment of the same style
2182 as the comment sequence which began this comment section */
2183 nesting++;
2184 INC_BOTH (from, from_byte);
2185 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 2186
95ff8dfc
RS
2187 forw_incomment:
2188 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2189 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
8f924df7 2190 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
abf8a9ff
SM
2191 SYNTAX_COMEND_SECOND (c1))
2192 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2193 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
2194 {
2195 if (--nesting <= 0)
2196 /* we have encountered a comment end of the same style
2197 as the comment sequence which began this comment
2198 section */
2199 break;
2200 else
2201 {
2202 INC_BOTH (from, from_byte);
2203 UPDATE_SYNTAX_TABLE_FORWARD (from);
2204 }
2205 }
95ff8dfc
RS
2206 if (nesting > 0
2207 && from < stop
2208 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
8f924df7 2209 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
95ff8dfc 2210 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
2211 && SYNTAX_COMSTART_SECOND (c1))
2212 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2213 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
2214 /* we have encountered a nested comment of the same style
2215 as the comment sequence which began this comment
2216 section */
2217 {
2218 INC_BOTH (from, from_byte);
2219 UPDATE_SYNTAX_TABLE_FORWARD (from);
2220 nesting++;
2221 }
2222 }
2223 *charpos_ptr = from;
2224 *bytepos_ptr = from_byte;
2225 return 1;
2226}
2227
b3cfe0c8 2228DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 2229 doc: /*
ee648c11 2230Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
2231Stop scanning if we find something other than a comment or whitespace.
2232Set point to where scanning stops.
ee648c11 2233If COUNT comments are found as expected, with nothing except whitespace
fdb82f93
PJ
2234between them, return t; otherwise return nil. */)
2235 (count)
840f481c 2236 Lisp_Object count;
b3cfe0c8 2237{
2312c580
SM
2238 register EMACS_INT from;
2239 EMACS_INT from_byte;
2240 register EMACS_INT stop;
8ea151b2 2241 register int c, c1;
b3cfe0c8
RS
2242 register enum syntaxcode code;
2243 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2244 int comnested = 0; /* whether the comment is nestable or not */
be720845 2245 int found;
2312c580
SM
2246 EMACS_INT count1;
2247 EMACS_INT out_charpos, out_bytepos;
95ff8dfc 2248 int dummy;
840f481c 2249
b7826503 2250 CHECK_NUMBER (count);
840f481c 2251 count1 = XINT (count);
195d1361 2252 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2253
2254 immediate_quit = 1;
2255 QUIT;
2256
2257 from = PT;
6a140a74 2258 from_byte = PT_BYTE;
b3cfe0c8 2259
195d1361 2260 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2261 while (count1 > 0)
b3cfe0c8 2262 {
04882296 2263 do
b3cfe0c8 2264 {
f902a008
RS
2265 int comstart_first;
2266
04882296
KH
2267 if (from == stop)
2268 {
ef316cf0 2269 SET_PT_BOTH (from, from_byte);
b7e6e612 2270 immediate_quit = 0;
04882296
KH
2271 return Qnil;
2272 }
b7dbcc19 2273 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
b3cfe0c8 2274 code = SYNTAX (c);
f902a008 2275 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2276 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2277 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 2278 INC_BOTH (from, from_byte);
f902a008 2279 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2280 if (from < stop && comstart_first
b7dbcc19 2281 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
8ea151b2 2282 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 2283 {
7d0393cf 2284 /* We have encountered a comment start sequence and we
7fc8191e 2285 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2286 the comment style this sequence begins so that later,
2287 only a comment end of the same style actually ends
7fc8191e 2288 the comment section. */
b3cfe0c8 2289 code = Scomment;
8ea151b2 2290 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 2291 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2292 INC_BOTH (from, from_byte);
95ff8dfc 2293 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2294 }
04882296 2295 }
abf8a9ff 2296 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2297
da71ab91
KH
2298 if (code == Scomment_fence)
2299 comstyle = ST_COMMENT_STYLE;
2300 else if (code != Scomment)
04882296
KH
2301 {
2302 immediate_quit = 0;
6a140a74 2303 DEC_BOTH (from, from_byte);
ef316cf0 2304 SET_PT_BOTH (from, from_byte);
04882296
KH
2305 return Qnil;
2306 }
2307 /* We're at the start of a comment. */
95ff8dfc
RS
2308 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2309 &out_charpos, &out_bytepos, &dummy);
2310 from = out_charpos; from_byte = out_bytepos;
2311 if (!found)
04882296 2312 {
95ff8dfc
RS
2313 immediate_quit = 0;
2314 SET_PT_BOTH (from, from_byte);
2315 return Qnil;
b3cfe0c8 2316 }
95ff8dfc
RS
2317 INC_BOTH (from, from_byte);
2318 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2319 /* We have skipped one comment. */
840f481c 2320 count1--;
b3cfe0c8
RS
2321 }
2322
840f481c 2323 while (count1 < 0)
b3cfe0c8 2324 {
b9145dbb 2325 while (1)
b3cfe0c8 2326 {
c65adb44 2327 int quoted;
f902a008 2328
b9145dbb
RS
2329 if (from <= stop)
2330 {
6a140a74 2331 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2332 immediate_quit = 0;
2333 return Qnil;
2334 }
b3cfe0c8 2335
6a140a74 2336 DEC_BOTH (from, from_byte);
f902a008 2337 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2338 quoted = char_quoted (from, from_byte);
b7dbcc19 2339 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
b3cfe0c8
RS
2340 code = SYNTAX (c);
2341 comstyle = 0;
95ff8dfc 2342 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2343 if (code == Sendcomment)
2344 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 2345 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 2346 && prev_char_comend_first (from, from_byte)
89c6809a 2347 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2348 {
7fc8191e 2349 /* We must record the comment style encountered so that
b3cfe0c8 2350 later, we can match only the proper comment begin
7fc8191e 2351 sequence of the same style. */
6a140a74 2352 DEC_BOTH (from, from_byte);
f902a008
RS
2353 code = Sendcomment;
2354 /* Calling char_quoted, above, set up global syntax position
2355 at the new value of FROM. */
b7dbcc19 2356 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2357 comstyle = SYNTAX_COMMENT_STYLE (c1);
2358 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
2359 }
2360
195d1361
RS
2361 if (code == Scomment_fence)
2362 {
2363 /* Skip until first preceding unquoted comment_fence. */
6a140a74 2364 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 2365
6a140a74 2366 while (1)
195d1361 2367 {
6a140a74 2368 DEC_BOTH (from, from_byte);
195d1361 2369 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2370 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
6a140a74 2371 if (SYNTAX (c) == Scomment_fence
7d0393cf 2372 && !char_quoted (from, from_byte))
195d1361 2373 {
7d0393cf 2374 found = 1;
195d1361
RS
2375 break;
2376 }
fcdd4585
SM
2377 else if (from == stop)
2378 break;
195d1361
RS
2379 }
2380 if (found == 0)
2381 {
2382 from = ini; /* Set point to ini + 1. */
6a140a74 2383 from_byte = ini_byte;
195d1361
RS
2384 goto leave;
2385 }
39c41ad4
SM
2386 else
2387 /* We have skipped one comment. */
2388 break;
195d1361
RS
2389 }
2390 else if (code == Sendcomment)
b3cfe0c8 2391 {
95ff8dfc 2392 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2393 &out_charpos, &out_bytepos);
1aa963c8
SM
2394 if (found == -1)
2395 {
abf8a9ff
SM
2396 if (c == '\n')
2397 /* This end-of-line is not an end-of-comment.
2398 Treat it like a whitespace.
2399 CC-mode (and maybe others) relies on this behavior. */
2400 ;
2401 else
2402 {
2403 /* Failure: we should go back to the end of this
2404 not-quite-endcomment. */
2405 if (SYNTAX(c) != code)
2406 /* It was a two-char Sendcomment. */
2407 INC_BOTH (from, from_byte);
2408 goto leave;
2409 }
1aa963c8 2410 }
2b927d02 2411 else
abf8a9ff
SM
2412 {
2413 /* We have skipped one comment. */
2414 from = out_charpos, from_byte = out_bytepos;
2415 break;
2416 }
b3cfe0c8 2417 }
bb0de084 2418 else if (code != Swhitespace || quoted)
b3cfe0c8 2419 {
195d1361 2420 leave:
b3cfe0c8 2421 immediate_quit = 0;
6a140a74 2422 INC_BOTH (from, from_byte);
ef316cf0 2423 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2424 return Qnil;
2425 }
2426 }
2427
840f481c 2428 count1++;
b3cfe0c8
RS
2429 }
2430
ef316cf0 2431 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2432 immediate_quit = 0;
2433 return Qt;
2434}
2435\f
b7dbcc19 2436/* Return syntax code of character C if C is an ASCII character
db9f48c3 2437 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08 2438
b7dbcc19
KH
2439#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2440 ((ASCII_CHAR_P (c) || !multibyte_symbol_p) \
bd25db08
KH
2441 ? SYNTAX (c) : Ssymbol)
2442
6a140a74 2443static Lisp_Object
8489eb67 2444scan_lists (from, count, depth, sexpflag)
932e6895
SM
2445 register EMACS_INT from;
2446 EMACS_INT count, depth;
2447 int sexpflag;
8489eb67
RS
2448{
2449 Lisp_Object val;
932e6895 2450 register EMACS_INT stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2451 register int c, c1;
2452 int stringterm;
8489eb67
RS
2453 int quoted;
2454 int mathexit = 0;
93da5fff 2455 register enum syntaxcode code, temp_code;
195d1361 2456 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2457 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2458 int comnested = 0; /* whether the comment is nestable or not */
932e6895
SM
2459 EMACS_INT temp_pos;
2460 EMACS_INT last_good = from;
195d1361 2461 int found;
932e6895
SM
2462 EMACS_INT from_byte;
2463 EMACS_INT out_bytepos, out_charpos;
95ff8dfc 2464 int temp, dummy;
bd25db08 2465 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2466
2467 if (depth > 0) min_depth = 0;
2468
c3907a7e
RS
2469 if (from > ZV) from = ZV;
2470 if (from < BEGV) from = BEGV;
2471
2472 from_byte = CHAR_TO_BYTE (from);
2473
8489eb67
RS
2474 immediate_quit = 1;
2475 QUIT;
2476
195d1361 2477 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2478 while (count > 0)
2479 {
8489eb67
RS
2480 while (from < stop)
2481 {
f902a008 2482 int comstart_first, prefix;
195d1361 2483 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2484 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2485 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2486 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2487 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2488 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2489 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2490 if (depth == min_depth)
2491 last_good = from;
6a140a74 2492 INC_BOTH (from, from_byte);
195d1361 2493 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2494 if (from < stop && comstart_first
327719ee
MB
2495 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2496 SYNTAX_COMSTART_SECOND (c))
8489eb67 2497 && parse_sexp_ignore_comments)
e5d4f4dc 2498 {
7d0393cf 2499 /* we have encountered a comment start sequence and we
195d1361 2500 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2501 the comment style this sequence begins so that later,
2502 only a comment end of the same style actually ends
2503 the comment section */
2504 code = Scomment;
b7dbcc19 2505 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2506 comstyle = SYNTAX_COMMENT_STYLE (c1);
2507 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2508 INC_BOTH (from, from_byte);
f902a008 2509 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2510 }
7d0393cf 2511
f902a008 2512 if (prefix)
8489eb67
RS
2513 continue;
2514
0220c518 2515 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2516 {
2517 case Sescape:
2518 case Scharquote:
8b8bf8e6
SM
2519 if (from == stop)
2520 goto lose;
6a140a74 2521 INC_BOTH (from, from_byte);
8489eb67
RS
2522 /* treat following character as a word constituent */
2523 case Sword:
2524 case Ssymbol:
2525 if (depth || !sexpflag) break;
195d1361 2526 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2527 while (from < stop)
2528 {
195d1361 2529 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2530
2531 /* Some compilers can't handle this inside the switch. */
b7dbcc19 2532 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2533 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2534 switch (temp)
8489eb67
RS
2535 {
2536 case Scharquote:
2537 case Sescape:
6a140a74 2538 INC_BOTH (from, from_byte);
8b8bf8e6
SM
2539 if (from == stop)
2540 goto lose;
8489eb67
RS
2541 break;
2542 case Sword:
2543 case Ssymbol:
2544 case Squote:
2545 break;
2546 default:
2547 goto done;
2548 }
6a140a74 2549 INC_BOTH (from, from_byte);
8489eb67
RS
2550 }
2551 goto done;
2552
195d1361 2553 case Scomment_fence:
95ff8dfc
RS
2554 comstyle = ST_COMMENT_STYLE;
2555 /* FALLTHROUGH */
2556 case Scomment:
8489eb67 2557 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2558 UPDATE_SYNTAX_TABLE_FORWARD (from);
2559 found = forw_comment (from, from_byte, stop,
2560 comnested, comstyle, 0,
2561 &out_charpos, &out_bytepos, &dummy);
2562 from = out_charpos, from_byte = out_bytepos;
2563 if (!found)
8489eb67 2564 {
95ff8dfc
RS
2565 if (depth == 0)
2566 goto done;
2567 goto lose;
8489eb67 2568 }
95ff8dfc
RS
2569 INC_BOTH (from, from_byte);
2570 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2571 break;
2572
2573 case Smath:
2574 if (!sexpflag)
2575 break;
b7dbcc19 2576 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
6a140a74
RS
2577 {
2578 INC_BOTH (from, from_byte);
2579 }
8489eb67
RS
2580 if (mathexit)
2581 {
2582 mathexit = 0;
2583 goto close1;
2584 }
2585 mathexit = 1;
2586
2587 case Sopen:
2588 if (!++depth) goto done;
2589 break;
2590
2591 case Sclose:
2592 close1:
2593 if (!--depth) goto done;
2594 if (depth < min_depth)
9471945b
KS
2595 xsignal3 (Qscan_error,
2596 build_string ("Containing expression ends prematurely"),
2597 make_number (last_good), make_number (from));
8489eb67
RS
2598 break;
2599
2600 case Sstring:
195d1361 2601 case Sstring_fence:
ef316cf0 2602 temp_pos = dec_bytepos (from_byte);
b7dbcc19 2603 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
8489eb67
RS
2604 while (1)
2605 {
8b8bf8e6
SM
2606 if (from >= stop)
2607 goto lose;
195d1361 2608 UPDATE_SYNTAX_TABLE_FORWARD (from);
b7dbcc19 2609 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
42ebfa31
SM
2610 if (code == Sstring
2611 ? (c == stringterm
2612 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2613 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2614 break;
7c7ca3d2
RS
2615
2616 /* Some compilers can't handle this inside the switch. */
bd25db08 2617 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2618 switch (temp)
8489eb67
RS
2619 {
2620 case Scharquote:
2621 case Sescape:
6a140a74 2622 INC_BOTH (from, from_byte);
8489eb67 2623 }
6a140a74 2624 INC_BOTH (from, from_byte);
8489eb67 2625 }
6a140a74 2626 INC_BOTH (from, from_byte);
8489eb67
RS
2627 if (!depth && sexpflag) goto done;
2628 break;
240c43e8
SM
2629 default:
2630 /* Ignore whitespace, punctuation, quote, endcomment. */
2631 break;
8489eb67
RS
2632 }
2633 }
2634
2635 /* Reached end of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2636 if (depth)
2637 goto lose;
8489eb67
RS
2638
2639 immediate_quit = 0;
2640 return Qnil;
2641
2642 /* End of object reached */
2643 done:
2644 count--;
2645 }
2646
2647 while (count < 0)
2648 {
8489eb67
RS
2649 while (from > stop)
2650 {
6a140a74 2651 DEC_BOTH (from, from_byte);
195d1361 2652 UPDATE_SYNTAX_TABLE_BACKWARD (from);
b7dbcc19 2653 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
bd25db08 2654 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2655 if (depth == min_depth)
2656 last_good = from;
7fc8191e 2657 comstyle = 0;
95ff8dfc 2658 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2659 if (code == Sendcomment)
2660 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2661 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2662 && prev_char_comend_first (from, from_byte)
8489eb67 2663 && parse_sexp_ignore_comments)
e5d4f4dc 2664 {
f902a008 2665 /* We must record the comment style encountered so that
e5d4f4dc 2666 later, we can match only the proper comment begin
f902a008 2667 sequence of the same style. */
6a140a74 2668 DEC_BOTH (from, from_byte);
f902a008
RS
2669 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2670 code = Sendcomment;
b7dbcc19 2671 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
95ff8dfc
RS
2672 comstyle = SYNTAX_COMMENT_STYLE (c1);
2673 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2674 }
7d0393cf 2675
9828a477 2676 /* Quoting turns anything except a comment-ender
e6365855 2677 into a word character. Note that this cannot be true
f902a008 2678 if we decremented FROM in the if-statement above. */
9828a477 2679 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2680 {
2681 DEC_BOTH (from, from_byte);
2682 code = Sword;
2683 }
9828a477 2684 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2685 continue;
2686
9828a477 2687 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2688 {
2689 case Sword:
2690 case Ssymbol:
9828a477
KH
2691 case Sescape:
2692 case Scharquote:
8489eb67 2693 if (depth || !sexpflag) break;
195d1361
RS
2694 /* This word counts as a sexp; count object finished
2695 after passing it. */
8489eb67
RS
2696 while (from > stop)
2697 {
6a140a74 2698 temp_pos = from_byte;
ef316cf0
RS
2699 if (! NILP (current_buffer->enable_multibyte_characters))
2700 DEC_POS (temp_pos);
2701 else
2702 temp_pos--;
6a140a74 2703 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2704 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2705 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2706 /* Don't allow comment-end to be quoted. */
2707 if (temp_code == Sendcomment)
2708 goto done2;
6a140a74 2709 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2710 if (quoted)
93da5fff 2711 {
6a140a74 2712 DEC_BOTH (from, from_byte);
ef316cf0 2713 temp_pos = dec_bytepos (temp_pos);
6a140a74 2714 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2715 }
b7dbcc19 2716 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
bd25db08 2717 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2718 if (! (quoted || temp_code == Sword
2719 || temp_code == Ssymbol
2720 || temp_code == Squote))
8489eb67 2721 goto done2;
6a140a74 2722 DEC_BOTH (from, from_byte);
8489eb67
RS
2723 }
2724 goto done2;
2725
2726 case Smath:
2727 if (!sexpflag)
2728 break;
ef316cf0 2729 temp_pos = dec_bytepos (from_byte);
6a140a74 2730 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
b7dbcc19 2731 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
6a140a74 2732 DEC_BOTH (from, from_byte);
8489eb67
RS
2733 if (mathexit)
2734 {
2735 mathexit = 0;
2736 goto open2;
2737 }
2738 mathexit = 1;
2739
2740 case Sclose:
2741 if (!++depth) goto done2;
2742 break;
2743
2744 case Sopen:
2745 open2:
2746 if (!--depth) goto done2;
2747 if (depth < min_depth)
9471945b
KS
2748 xsignal3 (Qscan_error,
2749 build_string ("Containing expression ends prematurely"),
2750 make_number (last_good), make_number (from));
8489eb67
RS
2751 break;
2752
2753 case Sendcomment:
2754 if (!parse_sexp_ignore_comments)
2755 break;
95ff8dfc 2756 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2757 &out_charpos, &out_bytepos);
1aa963c8
SM
2758 /* FIXME: if found == -1, then it really wasn't a comment-end.
2759 For single-char Sendcomment, we can't do much about it apart
2760 from skipping the char.
2761 For 2-char endcomments, we could try again, taking both
2762 chars as separate entities, but it's a lot of trouble
2763 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2764 if (found != -1)
2765 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2766 break;
2767
195d1361
RS
2768 case Scomment_fence:
2769 case Sstring_fence:
2770 while (1)
2771 {
8b8bf8e6
SM
2772 if (from == stop)
2773 goto lose;
6b61353c 2774 DEC_BOTH (from, from_byte);
195d1361 2775 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2776 if (!char_quoted (from, from_byte)
b7dbcc19 2777 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
bd25db08 2778 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2779 break;
2780 }
2781 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2782 break;
7d0393cf 2783
8489eb67 2784 case Sstring:
b7dbcc19 2785 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
8489eb67
RS
2786 while (1)
2787 {
8b8bf8e6
SM
2788 if (from == stop)
2789 goto lose;
6b61353c
KH
2790 DEC_BOTH (from, from_byte);
2791 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2792 if (!char_quoted (from, from_byte)
2793 && (stringterm
2794 == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
42ebfa31 2795 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2796 break;
8489eb67 2797 }
8489eb67
RS
2798 if (!depth && sexpflag) goto done2;
2799 break;
240c43e8
SM
2800 default:
2801 /* Ignore whitespace, punctuation, quote, endcomment. */
2802 break;
8489eb67
RS
2803 }
2804 }
2805
2806 /* Reached start of buffer. Error if within object, return nil if between */
8b8bf8e6
SM
2807 if (depth)
2808 goto lose;
8489eb67
RS
2809
2810 immediate_quit = 0;
2811 return Qnil;
2812
2813 done2:
2814 count++;
2815 }
2816
2817
2818 immediate_quit = 0;
1e142fb7 2819 XSETFASTINT (val, from);
8489eb67
RS
2820 return val;
2821
2822 lose:
9471945b
KS
2823 xsignal3 (Qscan_error,
2824 build_string ("Unbalanced parentheses"),
2825 make_number (last_good), make_number (from));
8489eb67
RS
2826}
2827
8489eb67 2828DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2829 doc: /* Scan from character number FROM by COUNT lists.
2830Returns the character number of the position thus found.
2831
2832If DEPTH is nonzero, paren depth begins counting from that value,
2833only places where the depth in parentheses becomes zero
2834are candidates for stopping; COUNT such places are counted.
2835Thus, a positive value for DEPTH means go out levels.
2836
2837Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2838
2839If the beginning or end of (the accessible part of) the buffer is reached
2840and the depth is wrong, an error is signaled.
2841If the depth is right but the count is not used up, nil is returned. */)
2842 (from, count, depth)
8489eb67
RS
2843 Lisp_Object from, count, depth;
2844{
b7826503
PJ
2845 CHECK_NUMBER (from);
2846 CHECK_NUMBER (count);
2847 CHECK_NUMBER (depth);
8489eb67
RS
2848
2849 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2850}
2851
2852DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2853 doc: /* Scan from character number FROM by COUNT balanced expressions.
2854If COUNT is negative, scan backwards.
2855Returns the character number of the position thus found.
2856
2857Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2858
2859If the beginning or end of (the accessible part of) the buffer is reached
2860in the middle of a parenthetical grouping, an error is signaled.
2861If the beginning or end is reached between groupings
2862but before count is used up, nil is returned. */)
2863 (from, count)
8489eb67
RS
2864 Lisp_Object from, count;
2865{
b7826503
PJ
2866 CHECK_NUMBER (from);
2867 CHECK_NUMBER (count);
8489eb67
RS
2868
2869 return scan_lists (XINT (from), XINT (count), 0, 1);
2870}
2871
2872DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2873 0, 0, 0,
2874 doc: /* Move point backward over any number of chars with prefix syntax.
2875This includes chars with "quote" or "prefix" syntax (' or p). */)
2876 ()
8489eb67
RS
2877{
2878 int beg = BEGV;
6a140a74
RS
2879 int opoint = PT;
2880 int opoint_byte = PT_BYTE;
6ec8bbd2 2881 int pos = PT;
6a140a74 2882 int pos_byte = PT_BYTE;
93da5fff 2883 int c;
93da5fff 2884
7d0393cf 2885 if (pos <= beg)
195d1361 2886 {
f902a008
RS
2887 SET_PT_BOTH (opoint, opoint_byte);
2888
2889 return Qnil;
195d1361 2890 }
8489eb67 2891
f902a008
RS
2892 SETUP_SYNTAX_TABLE (pos, -1);
2893
6a140a74
RS
2894 DEC_BOTH (pos, pos_byte);
2895
1fd3172d 2896 while (!char_quoted (pos, pos_byte)
195d1361 2897 /* Previous statement updates syntax table. */
b7dbcc19 2898 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2899 || SYNTAX_PREFIX (c)))
2900 {
1fd3172d
RS
2901 opoint = pos;
2902 opoint_byte = pos_byte;
2903
2904 if (pos + 1 > beg)
2905 DEC_BOTH (pos, pos_byte);
93da5fff 2906 }
8489eb67 2907
6a140a74 2908 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2909
2910 return Qnil;
2911}
2912\f
6a140a74 2913/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2914 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2915 and return a description of the state of the parse at END.
c81a3712 2916 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2917 If COMMENTSTOP is 1, stop at the start of a comment.
2918 If COMMENTSTOP is -1, stop at the start or end of a comment,
2919 after the beginning of a string, or after the end of a string. */
8489eb67 2920
340f92b5 2921static void
6a140a74 2922scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2923 stopbefore, oldstate, commentstop)
e5d4f4dc 2924 struct lisp_parse_state *stateptr;
8489eb67 2925 register int from;
38355d47 2926 int from_byte;
8489eb67
RS
2927 int end, targetdepth, stopbefore;
2928 Lisp_Object oldstate;
c81a3712 2929 int commentstop;
8489eb67
RS
2930{
2931 struct lisp_parse_state state;
2932
2933 register enum syntaxcode code;
95ff8dfc
RS
2934 int c1;
2935 int comnested;
8489eb67
RS
2936 struct level { int last, prev; };
2937 struct level levelstart[100];
2938 register struct level *curlevel = levelstart;
2939 struct level *endlevel = levelstart + 100;
8489eb67
RS
2940 register int depth; /* Paren depth of current scanning location.
2941 level - levelstart equals this except
2942 when the depth becomes negative. */
2943 int mindepth; /* Lowest DEPTH value seen. */
2944 int start_quoted = 0; /* Nonzero means starting after a char quote */
2945 Lisp_Object tem;
93da5fff 2946 int prev_from; /* Keep one character before FROM. */
6a140a74 2947 int prev_from_byte;
1fd3172d 2948 int prev_from_syntax;
195d1361
RS
2949 int boundary_stop = commentstop == -1;
2950 int nofence;
95ff8dfc 2951 int found;
60c86a83 2952 EMACS_INT out_bytepos, out_charpos;
7c7ca3d2 2953 int temp;
93da5fff
KH
2954
2955 prev_from = from;
6a140a74
RS
2956 prev_from_byte = from_byte;
2957 if (from != BEGV)
2958 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2959
2960 /* Use this macro instead of `from++'. */
6a140a74
RS
2961#define INC_FROM \
2962do { prev_from = from; \
2963 prev_from_byte = from_byte; \
327719ee 2964 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
ab229fdd 2965 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2966 INC_BOTH (from, from_byte); \
3ceb4629
SM
2967 if (from < end) \
2968 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2969 } while (0)
8489eb67
RS
2970
2971 immediate_quit = 1;
2972 QUIT;
2973
265a9e55 2974 if (NILP (oldstate))
8489eb67
RS
2975 {
2976 depth = 0;
2977 state.instring = -1;
2978 state.incomment = 0;
195d1361
RS
2979 state.comstyle = 0; /* comment style a by default. */
2980 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2981 }
2982 else
2983 {
2984 tem = Fcar (oldstate);
265a9e55 2985 if (!NILP (tem))
8489eb67
RS
2986 depth = XINT (tem);
2987 else
2988 depth = 0;
2989
2990 oldstate = Fcdr (oldstate);
2991 oldstate = Fcdr (oldstate);
2992 oldstate = Fcdr (oldstate);
2993 tem = Fcar (oldstate);
195d1361 2994 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2995 state.instring = (!NILP (tem)
2996 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2997 : -1);
8489eb67
RS
2998
2999 oldstate = Fcdr (oldstate);
3000 tem = Fcar (oldstate);
e76f1c44
GM
3001 state.incomment = (!NILP (tem)
3002 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 3003 : 0);
8489eb67
RS
3004
3005 oldstate = Fcdr (oldstate);
3006 tem = Fcar (oldstate);
265a9e55 3007 start_quoted = !NILP (tem);
e5d4f4dc 3008
95ff8dfc 3009 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
3010 style a. If it is non-nil, we are in comment style b */
3011 oldstate = Fcdr (oldstate);
e5d4f4dc 3012 oldstate = Fcdr (oldstate);
195d1361 3013 tem = Fcar (oldstate);
7d0393cf 3014 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 3015 ? ST_COMMENT_STYLE : 1);
195d1361 3016
e5d4f4dc 3017 oldstate = Fcdr (oldstate);
e5d4f4dc 3018 tem = Fcar (oldstate);
195d1361 3019 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
3020 oldstate = Fcdr (oldstate);
3021 tem = Fcar (oldstate);
3022 while (!NILP (tem)) /* >= second enclosing sexps. */
3023 {
3024 /* curlevel++->last ran into compiler bug on Apollo */
3025 curlevel->last = XINT (Fcar (tem));
3026 if (++curlevel == endlevel)
02010917 3027 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
3028 curlevel->prev = -1;
3029 curlevel->last = -1;
3030 tem = Fcdr (tem);
3031 }
8489eb67
RS
3032 }
3033 state.quoted = 0;
3034 mindepth = depth;
3035
3036 curlevel->prev = -1;
3037 curlevel->last = -1;
3038
326b283b 3039 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
3040 temp = FETCH_CHAR (prev_from_byte);
3041 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 3042 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 3043
195d1361 3044 /* Enter the loop at a place appropriate for initial state. */
8489eb67 3045
326b283b
RS
3046 if (state.incomment)
3047 goto startincomment;
8489eb67
RS
3048 if (state.instring >= 0)
3049 {
195d1361 3050 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
3051 if (start_quoted)
3052 goto startquotedinstring;
8489eb67
RS
3053 goto startinstring;
3054 }
326b283b
RS
3055 else if (start_quoted)
3056 goto startquoted;
1fd3172d 3057
8489eb67
RS
3058 while (from < end)
3059 {
93da5fff 3060 INC_FROM;
1fd3172d 3061 code = prev_from_syntax & 0xff;
4c920633 3062
c5d4b96f
SM
3063 if (from < end
3064 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3065 && (c1 = FETCH_CHAR (from_byte),
3066 SYNTAX_COMSTART_SECOND (c1)))
3067 /* Duplicate code to avoid a complex if-expression
3068 which causes trouble for the SGI compiler. */
95ff8dfc 3069 {
c5d4b96f
SM
3070 /* Record the comment style we have entered so that only
3071 the comment-end sequence of the same style actually
3072 terminates the comment section. */
3073 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
3074 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3075 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
3076 state.incomment = comnested ? 1 : -1;
95ff8dfc 3077 state.comstr_start = prev_from;
c5d4b96f
SM
3078 INC_FROM;
3079 code = Scomment;
95ff8dfc 3080 }
4c920633 3081 else if (code == Scomment_fence)
e5d4f4dc
RS
3082 {
3083 /* Record the comment style we have entered so that only
3084 the comment-end sequence of the same style actually
3085 terminates the comment section. */
95ff8dfc
RS
3086 state.comstyle = ST_COMMENT_STYLE;
3087 state.incomment = -1;
195d1361 3088 state.comstr_start = prev_from;
e5d4f4dc 3089 code = Scomment;
e5d4f4dc 3090 }
c5d4b96f
SM
3091 else if (code == Scomment)
3092 {
3093 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
3094 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3095 1 : -1);
3096 state.comstr_start = prev_from;
3097 }
e5d4f4dc 3098
1fd3172d 3099 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 3100 continue;
0220c518 3101 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
3102 {
3103 case Sescape:
3104 case Scharquote:
3105 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3106 curlevel->last = prev_from;
8489eb67
RS
3107 startquoted:
3108 if (from == end) goto endquoted;
93da5fff 3109 INC_FROM;
8489eb67
RS
3110 goto symstarted;
3111 /* treat following character as a word constituent */
3112 case Sword:
3113 case Ssymbol:
3114 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3115 curlevel->last = prev_from;
8489eb67
RS
3116 symstarted:
3117 while (from < end)
3118 {
7c7ca3d2 3119 /* Some compilers can't handle this inside the switch. */
327719ee 3120 temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
ab229fdd 3121 temp = SYNTAX (temp);
7c7ca3d2 3122 switch (temp)
8489eb67
RS
3123 {
3124 case Scharquote:
3125 case Sescape:
93da5fff 3126 INC_FROM;
8489eb67
RS
3127 if (from == end) goto endquoted;
3128 break;
3129 case Sword:
3130 case Ssymbol:
3131 case Squote:
3132 break;
3133 default:
3134 goto symdone;
3135 }
93da5fff 3136 INC_FROM;
8489eb67
RS
3137 }
3138 symdone:
3139 curlevel->prev = curlevel->last;
3140 break;
3141
240c43e8 3142 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 3143 case Scomment:
195d1361 3144 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
3145 startincomment:
3146 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 3147 that we find a 2-char comment ender even if we start in the
e6365855 3148 middle of it. We don't want to do that if we're just at the
d355bd8a 3149 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
3150 found = forw_comment (from, from_byte, end,
3151 state.incomment, state.comstyle,
e6365855
SM
3152 (from == BEGV || from < state.comstr_start + 3)
3153 ? 0 : prev_from_syntax,
95ff8dfc
RS
3154 &out_charpos, &out_bytepos, &state.incomment);
3155 from = out_charpos; from_byte = out_bytepos;
3156 /* Beware! prev_from and friends are invalid now.
3157 Luckily, the `done' doesn't use them and the INC_FROM
3158 sets them to a sane value without looking at them. */
3159 if (!found) goto done;
d355bd8a 3160 INC_FROM;
8489eb67 3161 state.incomment = 0;
e5d4f4dc 3162 state.comstyle = 0; /* reset the comment style */
195d1361 3163 if (boundary_stop) goto done;
8489eb67
RS
3164 break;
3165
3166 case Sopen:
3167 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3168 depth++;
3169 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 3170 curlevel->last = prev_from;
8489eb67 3171 if (++curlevel == endlevel)
02010917 3172 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
3173 curlevel->prev = -1;
3174 curlevel->last = -1;
30844415 3175 if (targetdepth == depth) goto done;
8489eb67
RS
3176 break;
3177
3178 case Sclose:
3179 depth--;
3180 if (depth < mindepth)
3181 mindepth = depth;
3182 if (curlevel != levelstart)
3183 curlevel--;
3184 curlevel->prev = curlevel->last;
30844415 3185 if (targetdepth == depth) goto done;
8489eb67
RS
3186 break;
3187
3188 case Sstring:
195d1361
RS
3189 case Sstring_fence:
3190 state.comstr_start = from - 1;
8489eb67 3191 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 3192 curlevel->last = prev_from;
7d0393cf 3193 state.instring = (code == Sstring
b7dbcc19 3194 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
195d1361
RS
3195 : ST_STRING_STYLE);
3196 if (boundary_stop) goto done;
8489eb67 3197 startinstring:
195d1361 3198 {
644ea4df 3199 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 3200
644ea4df
RS
3201 while (1)
3202 {
3203 int c;
195d1361 3204
644ea4df 3205 if (from >= end) goto done;
b7dbcc19 3206 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
7c7ca3d2
RS
3207 /* Some compilers can't handle this inside the switch. */
3208 temp = SYNTAX (c);
ccf89641
KH
3209
3210 /* Check TEMP here so that if the char has
3211 a syntax-table property which says it is NOT
3212 a string character, it does not end the string. */
3213 if (nofence && c == state.instring && temp == Sstring)
3214 break;
3215
7c7ca3d2 3216 switch (temp)
644ea4df
RS
3217 {
3218 case Sstring_fence:
3219 if (!nofence) goto string_end;
3220 break;
3221 case Scharquote:
3222 case Sescape:
3223 INC_FROM;
3224 startquotedinstring:
3225 if (from >= end) goto endquoted;
195d1361 3226 }
644ea4df
RS
3227 INC_FROM;
3228 }
195d1361
RS
3229 }
3230 string_end:
8489eb67
RS
3231 state.instring = -1;
3232 curlevel->prev = curlevel->last;
93da5fff 3233 INC_FROM;
195d1361 3234 if (boundary_stop) goto done;
8489eb67
RS
3235 break;
3236
3237 case Smath:
240c43e8
SM
3238 /* FIXME: We should do something with it. */
3239 break;
3240 default:
3241 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
3242 break;
3243 }
3244 }
3245 goto done;
3246
3247 stop: /* Here if stopping before start of sexp. */
93da5fff 3248 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3249 goto done; /* but return the position before it. */
3250
3251 endquoted:
3252 state.quoted = 1;
3253 done:
3254 state.depth = depth;
3255 state.mindepth = mindepth;
3256 state.thislevelstart = curlevel->prev;
3257 state.prevlevelstart
3258 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3259 state.location = from;
1a102a58
RS
3260 state.levelstarts = Qnil;
3261 while (--curlevel >= levelstart)
3262 state.levelstarts = Fcons (make_number (curlevel->last),
3263 state.levelstarts);
8489eb67
RS
3264 immediate_quit = 0;
3265
e5d4f4dc 3266 *stateptr = state;
8489eb67
RS
3267}
3268
c81a3712 3269DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3270 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3271Parsing stops at TO or when certain criteria are met;
3272 point is set to where parsing stops.
3273If fifth arg OLDSTATE is omitted or nil,
3274 parsing assumes that FROM is the beginning of a function.
0e6228bc 3275Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3276 0. depth in parens.
3277 1. character address of start of innermost containing list; nil if none.
3278 2. character address of start of last complete sexp terminated.
3279 3. non-nil if inside a string.
3280 (it is the character that will terminate the string,
3281 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3282 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3283 else an integer (the current comment nesting).
3284 5. t if following a quote character.
3285 6. the minimum paren-depth encountered during this scan.
3286 7. t if in a comment of style b; symbol `syntax-table' if the comment
3287 should be terminated by a generic comment delimiter.
3288 8. character address of start of comment or string; nil if not in one.
3289 9. Intermediate data for continuation of parsing (subject to change).
3290If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3291in parentheses becomes equal to TARGETDEPTH.
3292Fourth arg STOPBEFORE non-nil means stop when come to
3293 any character that starts a sexp.
0e6228bc 3294Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3295 It is used to initialize the state of the parse. Elements number 1, 2, 6
43726c05 3296 and 8 are ignored.
fdb82f93
PJ
3297Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3298 If it is symbol `syntax-table', stop after the start of a comment or a
3299 string, or after end of a comment or a string. */)
3300 (from, to, targetdepth, stopbefore, oldstate, commentstop)
c81a3712 3301 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
3302{
3303 struct lisp_parse_state state;
3304 int target;
3305
265a9e55 3306 if (!NILP (targetdepth))
8489eb67 3307 {
b7826503 3308 CHECK_NUMBER (targetdepth);
8489eb67
RS
3309 target = XINT (targetdepth);
3310 }
3311 else
3312 target = -100000; /* We won't reach this depth */
3313
3314 validate_region (&from, &to);
6a140a74
RS
3315 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3316 XINT (to),
c81a3712 3317 target, !NILP (stopbefore), oldstate,
7d0393cf 3318 (NILP (commentstop)
195d1361 3319 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3320
3321 SET_PT (state.location);
7d0393cf 3322
8489eb67
RS
3323 return Fcons (make_number (state.depth),
3324 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3325 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3326 Fcons (state.instring >= 0
3327 ? (state.instring == ST_STRING_STYLE
195d1361 3328 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3329 Fcons (state.incomment < 0 ? Qt :
3330 (state.incomment == 0 ? Qnil :
3331 make_number (state.incomment)),
8489eb67 3332 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3333 Fcons (make_number (state.mindepth),
7d0393cf 3334 Fcons ((state.comstyle
bff37d2a
RS
3335 ? (state.comstyle == ST_COMMENT_STYLE
3336 ? Qsyntax_table : Qt) :
3337 Qnil),
0beaf54f
DL
3338 Fcons (((state.incomment
3339 || (state.instring >= 0))
bff37d2a
RS
3340 ? make_number (state.comstr_start)
3341 : Qnil),
1a102a58 3342 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3343}
3344\f
dfcf069d 3345void
8489eb67
RS
3346init_syntax_once ()
3347{
78f9a1f7 3348 register int i, c;
8ea151b2 3349 Lisp_Object temp;
8489eb67 3350
5ebaddf5
RS
3351 /* This has to be done here, before we call Fmake_char_table. */
3352 Qsyntax_table = intern ("syntax-table");
3353 staticpro (&Qsyntax_table);
3354
3355 /* Intern this now in case it isn't already done.
3356 Setting this variable twice is harmless.
3357 But don't staticpro it here--that is done in alloc.c. */
3358 Qchar_table_extra_slots = intern ("char-table-extra-slots");
3359
93da5fff 3360 /* Create objects which can be shared among syntax tables. */
42ebfa31 3361 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3362 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3363 XVECTOR (Vsyntax_code_object)->contents[i]
3364 = Fcons (make_number (i), Qnil);
3365
5ebaddf5
RS
3366 /* Now we are ready to set up this property, so we can
3367 create syntax tables. */
3368 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3369
93da5fff 3370 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3371
5ebaddf5 3372 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3373
aa7b08b4
RS
3374 /* Control characters should not be whitespace. */
3375 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3376 for (i = 0; i <= ' ' - 1; i++)
3377 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3378 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3379
3380 /* Except that a few really are whitespace. */
3381 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3382 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3383 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3384 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3385 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3386 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3387
93da5fff 3388 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3389 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3390 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3391 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3392 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3393 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3394 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3395
3396 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3397 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3398
3399 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3400 Fcons (make_number (Sopen), make_number (')')));
3401 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3402 Fcons (make_number (Sclose), make_number ('(')));
3403 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3404 Fcons (make_number (Sopen), make_number (']')));
3405 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3406 Fcons (make_number (Sclose), make_number ('[')));
3407 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3408 Fcons (make_number (Sopen), make_number ('}')));
3409 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3410 Fcons (make_number (Sclose), make_number ('{')));
3411 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3412 Fcons (make_number ((int) Sstring), Qnil));
3413 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3414 Fcons (make_number ((int) Sescape), Qnil));
3415
93da5fff 3416 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3417 for (i = 0; i < 10; i++)
78f9a1f7
KH
3418 {
3419 c = "_-+*/&|<>="[i];
3420 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3421 }
8489eb67 3422
93da5fff 3423 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3424 for (i = 0; i < 12; i++)
78f9a1f7
KH
3425 {
3426 c = ".,;:?!#@~^'`"[i];
3427 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3428 }
bd25db08
KH
3429
3430 /* All multibyte characters have syntax `word' by default. */
3431 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
5c7b02ab 3432 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
8489eb67
RS
3433}
3434
dfcf069d 3435void
8489eb67
RS
3436syms_of_syntax ()
3437{
3438 Qsyntax_table_p = intern ("syntax-table-p");
3439 staticpro (&Qsyntax_table_p);
3440
93da5fff
KH
3441 staticpro (&Vsyntax_code_object);
3442
a2994c46
KS
3443 staticpro (&gl_state.object);
3444 staticpro (&gl_state.global_code);
3445 staticpro (&gl_state.current_syntax_table);
3446 staticpro (&gl_state.old_prop);
3447
3448 /* Defined in regex.c */
3449 staticpro (&re_match_object);
3450
7bf5e9e4
RS
3451 Qscan_error = intern ("scan-error");
3452 staticpro (&Qscan_error);
3453 Fput (Qscan_error, Qerror_conditions,
f3be100f 3454 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
3455 Fput (Qscan_error, Qerror_message,
3456 build_string ("Scan error"));
3457
8489eb67 3458 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3459 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3460
195d1361 3461 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3462 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3463Otherwise, that text property is simply ignored.
3464See the info node `(elisp)Syntax Properties' for a description of the
3465`syntax-table' property. */);
195d1361 3466
8489eb67
RS
3467 words_include_escapes = 0;
3468 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3469 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3470
bd25db08 3471 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3472 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3473 multibyte_syntax_as_symbol = 0;
3474
f4ed767f
GM
3475 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3476 &open_paren_in_column_0_is_defun_start,
b222e415 3477 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3478 open_paren_in_column_0_is_defun_start = 1;
3479
8f924df7
KH
3480
3481 DEFVAR_LISP ("find-word-boundary-function-table",
3482 &Vfind_word_boundary_function_table,
869bb237 3483 doc: /*
8f924df7 3484Char table of functions to search for the word boundary.
869bb237
KH
3485Each function is called with two arguments; POS and LIMIT.
3486POS and LIMIT are character positions in the current buffer.
3487
3488If POS is less than LIMIT, POS is at the first character of a word,
3489and the return value of a function is a position after the last
3490character of that word.
3491
3492If POS is not less than LIMIT, POS is at the last character of a word,
3493and the return value of a function is a position at the first
3494character of that word.
3495
3496In both cases, LIMIT bounds the search. */);
8f924df7 3497 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
869bb237 3498
8489eb67
RS
3499 defsubr (&Ssyntax_table_p);
3500 defsubr (&Ssyntax_table);
3501 defsubr (&Sstandard_syntax_table);
3502 defsubr (&Scopy_syntax_table);
3503 defsubr (&Sset_syntax_table);
3504 defsubr (&Schar_syntax);
beefa22e 3505 defsubr (&Smatching_paren);
c65adb44 3506 defsubr (&Sstring_to_syntax);
8489eb67 3507 defsubr (&Smodify_syntax_entry);
62abe9cb 3508 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3509
3510 defsubr (&Sforward_word);
3511
195d1361
RS
3512 defsubr (&Sskip_chars_forward);
3513 defsubr (&Sskip_chars_backward);
3514 defsubr (&Sskip_syntax_forward);
3515 defsubr (&Sskip_syntax_backward);
3516
b3cfe0c8 3517 defsubr (&Sforward_comment);
8489eb67
RS
3518 defsubr (&Sscan_lists);
3519 defsubr (&Sscan_sexps);
3520 defsubr (&Sbackward_prefix_chars);
3521 defsubr (&Sparse_partial_sexp);
3522}
839966f3
KH
3523
3524/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3525 (do not change this comment) */