(START_FILES, LIB_STANDARD): Adjust value according to HAVE_X86_64_LIB64_DIR.
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
429ab54e 2 Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
8cabe764 3 2002, 2003, 2004, 2005, 2006, 2007, 2008
429ab54e 4 Free Software Foundation, Inc.
8489eb67
RS
5
6This file is part of GNU Emacs.
7
8GNU Emacs is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
684d6f5b 10the Free Software Foundation; either version 3, or (at your option)
8489eb67
RS
11any later version.
12
13GNU Emacs is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with GNU Emacs; see the file COPYING. If not, write to
4fc5845f
LK
20the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21Boston, MA 02110-1301, USA. */
8489eb67
RS
22
23
18160b98 24#include <config.h>
8489eb67
RS
25#include <ctype.h>
26#include "lisp.h"
27#include "commands.h"
28#include "buffer.h"
93da5fff 29#include "charset.h"
e35f6ff7 30#include "keymap.h"
a1bc88d4 31#include "regex.h"
195d1361
RS
32
33/* Make syntax table lookup grant data in gl_state. */
34#define SYNTAX_ENTRY_VIA_PROPERTY
35
8489eb67 36#include "syntax.h"
195d1361
RS
37#include "intervals.h"
38
39/* We use these constants in place for comment-style and
40 string-ender-char to distinguish comments/strings started by
41 comment_fence and string_fence codes. */
42
43#define ST_COMMENT_STYLE (256 + 1)
44#define ST_STRING_STYLE (256 + 2)
93da5fff 45#include "category.h"
8489eb67 46
7bf5e9e4 47Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
48
49int words_include_escapes;
195d1361 50int parse_sexp_lookup_properties;
8489eb67 51
bd25db08
KH
52/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
53int multibyte_syntax_as_symbol;
54
8ea151b2
RS
55/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
56 if not compiled with GCC. No need to mark it, since it is used
57 only very temporarily. */
58Lisp_Object syntax_temp;
59
f4ed767f
GM
60/* Non-zero means an open parenthesis in column 0 is always considered
61 to be the start of a defun. Zero means an open parenthesis in
62 column 0 has no special meaning. */
63
64int open_paren_in_column_0_is_defun_start;
65
e5d4f4dc
RS
66/* This is the internal form of the parse state used in parse-partial-sexp. */
67
68struct lisp_parse_state
69 {
195d1361
RS
70 int depth; /* Depth at end of parsing. */
71 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 72 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 73 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
74 int quoted; /* Nonzero if just after an escape char at end of parsing */
75 int thislevelstart; /* Char number of most recent start-of-expression at current level */
76 int prevlevelstart; /* Char number of start of containing expression */
195d1361 77 int location; /* Char number at which parsing stopped. */
e5d4f4dc 78 int mindepth; /* Minimum depth seen while scanning. */
195d1361 79 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
80 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
81 of levels (starting from outermost). */
e5d4f4dc
RS
82 };
83\f
37bef230
RS
84/* These variables are a cache for finding the start of a defun.
85 find_start_pos is the place for which the defun start was found.
86 find_start_value is the defun start position found for it.
6a140a74 87 find_start_value_byte is the corresponding byte position.
37bef230
RS
88 find_start_buffer is the buffer it was found in.
89 find_start_begv is the BEGV value when it was found.
90 find_start_modiff is the value of MODIFF when it was found. */
91
92static int find_start_pos;
93static int find_start_value;
6a140a74 94static int find_start_value_byte;
37bef230
RS
95static struct buffer *find_start_buffer;
96static int find_start_begv;
97static int find_start_modiff;
6a140a74
RS
98
99
100static int find_defun_start P_ ((int, int));
932e6895
SM
101static int back_comment P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int, int,
102 EMACS_INT *, EMACS_INT *));
6a140a74 103static int char_quoted P_ ((int, int));
a1bc88d4 104static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int));
932e6895 105static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
6a140a74
RS
106static void scan_sexps_forward P_ ((struct lisp_parse_state *,
107 int, int, int, int,
108 int, Lisp_Object, int));
a1bc88d4 109static int in_classes P_ ((int, Lisp_Object));
195d1361
RS
110\f
111
112struct gl_state_s gl_state; /* Global state of syntax parser. */
113
114INTERVAL interval_of ();
115#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
116 to scan to property-change. */
117
6a140a74
RS
118/* Update gl_state to an appropriate interval which contains CHARPOS. The
119 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 120 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 121 valid at start, the rest is filled basing on OBJECT.
195d1361 122
6a140a74 123 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
124 direction than the intervals - or in an interval. We update the
125 current syntax-table basing on the property of this interval, and
6a140a74 126 update the interval to start further than CHARPOS - or be
195d1361 127 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 128 charpos to call this subroutine again - or be before/after the
195d1361
RS
129 start/end of OBJECT. */
130
131void
6a140a74
RS
132update_syntax_table (charpos, count, init, object)
133 int charpos, count, init;
195d1361
RS
134 Lisp_Object object;
135{
136 Lisp_Object tmp_table;
4ffe723b 137 int cnt = 0, invalidate = 1;
5a774522 138 INTERVAL i;
195d1361
RS
139
140 if (init)
141 {
bb0de084 142 gl_state.old_prop = Qnil;
195d1361
RS
143 gl_state.start = gl_state.b_property;
144 gl_state.stop = gl_state.e_property;
bb0de084
SM
145 i = interval_of (charpos, object);
146 gl_state.backward_i = gl_state.forward_i = i;
195d1361
RS
147 invalidate = 0;
148 if (NULL_INTERVAL_P (i))
149 return;
f902a008 150 /* interval_of updates only ->position of the return value, so
d80f4cc7 151 update the parents manually to speed up update_interval. */
bb0de084 152 while (!NULL_PARENT (i))
d80f4cc7
RS
153 {
154 if (AM_RIGHT_CHILD (i))
439d5cb4 155 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 156 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
157 - TOTAL_LENGTH (INTERVAL_PARENT (i))
158 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 159 else
439d5cb4 160 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 161 + TOTAL_LENGTH (i);
439d5cb4 162 i = INTERVAL_PARENT (i);
d80f4cc7
RS
163 }
164 i = gl_state.forward_i;
bb0de084 165 gl_state.b_property = i->position - gl_state.offset;
ee0cdb48 166 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
167 goto update;
168 }
5a774522 169 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
195d1361 170
423e705d 171 /* We are guaranteed to be called with CHARPOS either in i,
6a140a74 172 or further off. */
195d1361
RS
173 if (NULL_INTERVAL_P (i))
174 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 175 else if (charpos < i->position) /* Move left. */
195d1361
RS
176 {
177 if (count > 0)
f902a008 178 error ("Error in syntax_table logic for intervals <-");
195d1361 179 /* Update the interval. */
6a140a74 180 i = update_interval (i, charpos);
bb0de084 181 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
195d1361
RS
182 {
183 invalidate = 0;
195d1361 184 gl_state.forward_i = i;
ee0cdb48 185 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361 186 }
423e705d 187 }
6a140a74 188 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
189 {
190 if (count < 0)
f902a008 191 error ("Error in syntax_table logic for intervals ->");
195d1361 192 /* Update the interval. */
6a140a74 193 i = update_interval (i, charpos);
bb0de084 194 if (i->position != gl_state.e_property)
195d1361
RS
195 {
196 invalidate = 0;
195d1361 197 gl_state.backward_i = i;
bb0de084 198 gl_state.b_property = i->position - gl_state.offset;
195d1361
RS
199 }
200 }
195d1361
RS
201
202 update:
203 tmp_table = textget (i->plist, Qsyntax_table);
204
205 if (invalidate)
206 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
7d0393cf 207
423e705d
SM
208 if (invalidate) /* Did not get to adjacent interval. */
209 { /* with the same table => */
210 /* invalidate the old range. */
195d1361
RS
211 if (count > 0)
212 {
213 gl_state.backward_i = i;
bb0de084
SM
214 gl_state.b_property = i->position - gl_state.offset;
215 }
216 else
195d1361 217 {
bb0de084 218 gl_state.forward_i = i;
ee0cdb48 219 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
220 }
221 }
37bef230 222
bb0de084 223 if (!EQ (tmp_table, gl_state.old_prop))
195d1361 224 {
bb0de084
SM
225 gl_state.current_syntax_table = tmp_table;
226 gl_state.old_prop = tmp_table;
227 if (EQ (Fsyntax_table_p (tmp_table), Qt))
228 {
229 gl_state.use_global = 0;
7d0393cf 230 }
bb0de084
SM
231 else if (CONSP (tmp_table))
232 {
233 gl_state.use_global = 1;
234 gl_state.global_code = tmp_table;
235 }
7d0393cf 236 else
bb0de084
SM
237 {
238 gl_state.use_global = 0;
239 gl_state.current_syntax_table = current_buffer->syntax_table;
240 }
195d1361
RS
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
bb0de084
SM
248 {
249 gl_state.e_property = i->position - gl_state.offset;
250 gl_state.forward_i = i;
251 }
252 else
253 {
5a774522
SM
254 gl_state.b_property
255 = i->position + LENGTH (i) - gl_state.offset;
bb0de084
SM
256 gl_state.backward_i = i;
257 }
258 return;
195d1361 259 }
7d0393cf 260 else if (cnt == INTERVALS_AT_ONCE)
195d1361
RS
261 {
262 if (count > 0)
bb0de084 263 {
5a774522
SM
264 gl_state.e_property
265 = i->position + LENGTH (i) - gl_state.offset
266 /* e_property at EOB is not set to ZV but to ZV+1, so that
267 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
268 having to check eob between the two. */
269 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
bb0de084
SM
270 gl_state.forward_i = i;
271 }
272 else
273 {
274 gl_state.b_property = i->position - gl_state.offset;
275 gl_state.backward_i = i;
276 }
277 return;
195d1361
RS
278 }
279 cnt++;
280 i = count > 0 ? next_interval (i) : previous_interval (i);
281 }
bb0de084
SM
282 eassert (NULL_INTERVAL_P (i)); /* This property goes to the end. */
283 if (count > 0)
284 gl_state.e_property = gl_state.stop;
285 else
286 gl_state.b_property = gl_state.start;
195d1361
RS
287}
288\f
6a140a74
RS
289/* Returns TRUE if char at CHARPOS is quoted.
290 Global syntax-table data should be set up already to be good at CHARPOS
291 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
292
293static int
6a140a74
RS
294char_quoted (charpos, bytepos)
295 register int charpos, bytepos;
195d1361
RS
296{
297 register enum syntaxcode code;
298 register int beg = BEGV;
299 register int quoted = 0;
6a140a74
RS
300 int orig = charpos;
301
302 DEC_BOTH (charpos, bytepos);
195d1361 303
709a47ce 304 while (charpos >= beg)
195d1361 305 {
ab229fdd
AS
306 int c;
307
6a140a74 308 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
ab229fdd
AS
309 c = FETCH_CHAR (bytepos);
310 code = SYNTAX (c);
6a140a74
RS
311 if (! (code == Scharquote || code == Sescape))
312 break;
313
314 DEC_BOTH (charpos, bytepos);
315 quoted = !quoted;
195d1361 316 }
6a140a74
RS
317
318 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
319 return quoted;
320}
6a140a74
RS
321
322/* Return the bytepos one character after BYTEPOS.
323 We assume that BYTEPOS is not at the end of the buffer. */
324
325INLINE int
326inc_bytepos (bytepos)
327 int bytepos;
328{
ef316cf0
RS
329 if (NILP (current_buffer->enable_multibyte_characters))
330 return bytepos + 1;
331
6a140a74
RS
332 INC_POS (bytepos);
333 return bytepos;
334}
335
336/* Return the bytepos one character before BYTEPOS.
337 We assume that BYTEPOS is not at the start of the buffer. */
338
339INLINE int
340dec_bytepos (bytepos)
341 int bytepos;
342{
ef316cf0
RS
343 if (NILP (current_buffer->enable_multibyte_characters))
344 return bytepos - 1;
345
6a140a74
RS
346 DEC_POS (bytepos);
347 return bytepos;
348}
195d1361 349\f
7cc80f0a
RS
350/* Return a defun-start position before before POS and not too far before.
351 It should be the last one before POS, or nearly the last.
352
353 When open_paren_in_column_0_is_defun_start is nonzero,
1fd1cc2f 354 only the beginning of the buffer is treated as a defun-start.
7cc80f0a
RS
355
356 We record the information about where the scan started
357 and what its result was, so that another call in the same area
358 can return the same value very quickly.
195d1361
RS
359
360 There is no promise at which position the global syntax data is
361 valid on return from the subroutine, so the caller should explicitly
362 update the global data. */
37bef230
RS
363
364static int
6a140a74
RS
365find_defun_start (pos, pos_byte)
366 int pos, pos_byte;
37bef230 367{
6a140a74 368 int opoint = PT, opoint_byte = PT_BYTE;
37bef230 369
1fd1cc2f
RS
370 if (!open_paren_in_column_0_is_defun_start)
371 {
372 find_start_value_byte = BEGV_BYTE;
373 return BEGV;
374 }
375
37bef230
RS
376 /* Use previous finding, if it's valid and applies to this inquiry. */
377 if (current_buffer == find_start_buffer
378 /* Reuse the defun-start even if POS is a little farther on.
379 POS might be in the next defun, but that's ok.
380 Our value may not be the best possible, but will still be usable. */
381 && pos <= find_start_pos + 1000
382 && pos >= find_start_value
383 && BEGV == find_start_begv
384 && MODIFF == find_start_modiff)
385 return find_start_value;
386
387 /* Back up to start of line. */
6a140a74 388 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 389
195d1361
RS
390 /* We optimize syntax-table lookup for rare updates. Thus we accept
391 only those `^\s(' which are good in global _and_ text-property
392 syntax-tables. */
393 gl_state.current_syntax_table = current_buffer->syntax_table;
394 gl_state.use_global = 0;
1fd1cc2f 395 while (PT > BEGV)
37bef230 396 {
ab229fdd
AS
397 int c;
398
1fd1cc2f
RS
399 /* Open-paren at start of line means we may have found our
400 defun-start. */
ab229fdd
AS
401 c = FETCH_CHAR (PT_BYTE);
402 if (SYNTAX (c) == Sopen)
195d1361 403 {
1fd1cc2f 404 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
ab229fdd
AS
405 c = FETCH_CHAR (PT_BYTE);
406 if (SYNTAX (c) == Sopen)
1fd1cc2f
RS
407 break;
408 /* Now fallback to the default value. */
409 gl_state.current_syntax_table = current_buffer->syntax_table;
410 gl_state.use_global = 0;
195d1361 411 }
1fd1cc2f
RS
412 /* Move to beg of previous line. */
413 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
37bef230
RS
414 }
415
416 /* Record what we found, for the next try. */
6a140a74
RS
417 find_start_value = PT;
418 find_start_value_byte = PT_BYTE;
37bef230
RS
419 find_start_buffer = current_buffer;
420 find_start_modiff = MODIFF;
421 find_start_begv = BEGV;
422 find_start_pos = pos;
423
6a140a74
RS
424 TEMP_SET_PT_BOTH (opoint, opoint_byte);
425
37bef230
RS
426 return find_start_value;
427}
428\f
f902a008
RS
429/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
430
431static int
432prev_char_comend_first (pos, pos_byte)
433 int pos, pos_byte;
434{
435 int c, val;
436
437 DEC_BOTH (pos, pos_byte);
438 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
439 c = FETCH_CHAR (pos_byte);
440 val = SYNTAX_COMEND_FIRST (c);
441 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
442 return val;
443}
444
445/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
446
3f679f55
SM
447/* static int
448 * prev_char_comstart_first (pos, pos_byte)
449 * int pos, pos_byte;
450 * {
451 * int c, val;
7d0393cf 452 *
3f679f55
SM
453 * DEC_BOTH (pos, pos_byte);
454 * UPDATE_SYNTAX_TABLE_BACKWARD (pos);
455 * c = FETCH_CHAR (pos_byte);
456 * val = SYNTAX_COMSTART_FIRST (c);
457 * UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
458 * return val;
459 * } */
f902a008 460
6a140a74
RS
461/* Checks whether charpos FROM is at the end of a comment.
462 FROM_BYTE is the bytepos corresponding to FROM.
463 Do not move back before STOP.
464
465 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
466 return -1 otherwise.
467
468 If successful, store the charpos of the comment's beginning
469 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
470
471 Global syntax data remains valid for backward search starting at
472 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
473
474static int
95ff8dfc 475back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
932e6895 476 EMACS_INT from, from_byte, stop;
95ff8dfc 477 int comnested, comstyle;
932e6895 478 EMACS_INT *charpos_ptr, *bytepos_ptr;
195d1361
RS
479{
480 /* Look back, counting the parity of string-quotes,
481 and recording the comment-starters seen.
482 When we reach a safe place, assume that's not in a string;
483 then step the main scan to the earliest comment-starter seen
484 an even number of string quotes away from the safe place.
485
486 OFROM[I] is position of the earliest comment-starter seen
487 which is I+2X quotes from the comment-end.
488 PARITY is current parity of quotes from the comment end. */
3ee5041c 489 int string_style = -1; /* Presumed outside of any string. */
195d1361 490 int string_lossage = 0;
f7c436c1 491 /* Not a real lossage: indicates that we have passed a matching comment
7d0393cf 492 starter plus a non-matching comment-ender, meaning that any matching
f7c436c1
SM
493 comment-starter we might see later could be a false positive (hidden
494 inside another comment).
495 Test case: { a (* b } c (* d *) */
496 int comment_lossage = 0;
195d1361 497 int comment_end = from;
6a140a74 498 int comment_end_byte = from_byte;
195d1361 499 int comstart_pos = 0;
6a140a74 500 int comstart_byte;
eb35b628
RS
501 /* Place where the containing defun starts,
502 or 0 if we didn't come across it yet. */
503 int defun_start = 0;
504 int defun_start_byte = 0;
195d1361 505 register enum syntaxcode code;
95ff8dfc 506 int nesting = 1; /* current comment nesting */
ea315ed6 507 int c;
3f679f55
SM
508 int syntax = 0;
509
510 /* FIXME: A }} comment-ender style leads to incorrect behavior
511 in the case of {{ c }}} because we ignore the last two chars which are
512 assumed to be comment-enders although they aren't. */
195d1361
RS
513
514 /* At beginning of range to scan, we're outside of strings;
515 that determines quote parity to the comment-end. */
516 while (from != stop)
517 {
3f679f55
SM
518 int temp_byte, prev_syntax;
519 int com2start, com2end;
6a140a74 520
195d1361 521 /* Move back and examine a character. */
6a140a74 522 DEC_BOTH (from, from_byte);
195d1361
RS
523 UPDATE_SYNTAX_TABLE_BACKWARD (from);
524
3f679f55 525 prev_syntax = syntax;
6a140a74 526 c = FETCH_CHAR (from_byte);
3f679f55 527 syntax = SYNTAX_WITH_FLAGS (c);
195d1361
RS
528 code = SYNTAX (c);
529
3f679f55
SM
530 /* Check for 2-char comment markers. */
531 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
532 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
533 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
534 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
535 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
536 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
537 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
538
539 /* Nasty cases with overlapping 2-char comment markers:
540 - snmp-mode: -- c -- foo -- c --
541 --- c --
542 ------ c --
543 - c-mode: *||*
544 |* *|* *|
545 |*| |* |*|
546 /// */
547
548 /* If a 2-char comment sequence partly overlaps with another,
549 we don't try to be clever. */
550 if (from > stop && (com2end || com2start))
195d1361 551 {
3f679f55
SM
552 int next = from, next_byte = from_byte, next_c, next_syntax;
553 DEC_BOTH (next, next_byte);
554 UPDATE_SYNTAX_TABLE_BACKWARD (next);
555 next_c = FETCH_CHAR (next_byte);
556 next_syntax = SYNTAX_WITH_FLAGS (next_c);
557 if (((com2start || comnested)
558 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
559 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
560 || ((com2end || comnested)
561 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
562 && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
563 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
564 goto lossage;
565 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
f902a008 566 }
3f679f55
SM
567
568 if (com2start && comstart_pos == 0)
569 /* We're looking at a comment starter. But it might be a comment
570 ender as well (see snmp-mode). The first time we see one, we
571 need to consider it as a comment starter,
572 and the subsequent times as a comment ender. */
573 com2end = 0;
574
575 /* Turn a 2-char comment sequences into the appropriate syntax. */
576 if (com2end)
577 code = Sendcomment;
578 else if (com2start)
579 code = Scomment;
580 /* Ignore comment starters of a different style. */
581 else if (code == Scomment
582 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
583 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
1aa963c8 584 continue;
195d1361 585
9828a477
KH
586 /* Ignore escaped characters, except comment-enders. */
587 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
588 continue;
589
3ee5041c 590 switch (code)
195d1361 591 {
3ee5041c
SM
592 case Sstring_fence:
593 case Scomment_fence:
594 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
595 case Sstring:
596 /* Track parity of quotes. */
597 if (string_style == -1)
598 /* Entering a string. */
599 string_style = c;
600 else if (string_style == c)
601 /* Leaving the string. */
602 string_style = -1;
603 else
604 /* If we have two kinds of string delimiters.
605 There's no way to grok this scanning backwards. */
195d1361 606 string_lossage = 1;
3ee5041c 607 break;
7d0393cf 608
3ee5041c
SM
609 case Scomment:
610 /* We've already checked that it is the relevant comstyle. */
f7c436c1 611 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
612 /* There are odd string quotes involved, so let's be careful.
613 Test case in Pascal: " { " a { " } */
614 goto lossage;
615
f7c436c1
SM
616 if (!comnested)
617 {
618 /* Record best comment-starter so far. */
619 comstart_pos = from;
620 comstart_byte = from_byte;
621 }
622 else if (--nesting <= 0)
95ff8dfc
RS
623 /* nested comments have to be balanced, so we don't need to
624 keep looking for earlier ones. We use here the same (slightly
625 incorrect) reasoning as below: since it is followed by uniform
626 paired string quotes, this comment-start has to be outside of
627 strings, else the comment-end itself would be inside a string. */
628 goto done;
3ee5041c
SM
629 break;
630
02010917 631 case Sendcomment:
3f679f55 632 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
d070eb22 633 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
3f679f55 634 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
02010917
SM
635 /* This is the same style of comment ender as ours. */
636 {
637 if (comnested)
638 nesting++;
639 else
640 /* Anything before that can't count because it would match
641 this comment-ender rather than ours. */
642 from = stop; /* Break out of the loop. */
643 }
f7c436c1
SM
644 else if (comstart_pos != 0 || c != '\n')
645 /* We're mixing comment styles here, so we'd better be careful.
646 The (comstart_pos != 0 || c != '\n') check is not quite correct
647 (we should just always set comment_lossage), but removing it
648 would imply that any multiline comment in C would go through
649 lossage, which seems overkill.
650 The failure should only happen in the rare cases such as
651 { (* } *) */
652 comment_lossage = 1;
02010917 653 break;
195d1361 654
02010917
SM
655 case Sopen:
656 /* Assume a defun-start point is outside of strings. */
657 if (open_paren_in_column_0_is_defun_start
658 && (from == stop
659 || (temp_byte = dec_bytepos (from_byte),
660 FETCH_CHAR (temp_byte) == '\n')))
661 {
662 defun_start = from;
663 defun_start_byte = from_byte;
664 from = stop; /* Break out of the loop. */
665 }
eb35b628 666 break;
02010917
SM
667
668 default:
f7c436c1 669 break;
eb35b628 670 }
195d1361
RS
671 }
672
673 if (comstart_pos == 0)
674 {
675 from = comment_end;
6a140a74 676 from_byte = comment_end_byte;
195d1361
RS
677 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
678 }
f7c436c1
SM
679 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
680 or `done'), then we've found the beginning of the non-nested comment. */
681 else if (1) /* !comnested */
195d1361
RS
682 {
683 from = comstart_pos;
6a140a74 684 from_byte = comstart_byte;
789f3320 685 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
195d1361
RS
686 }
687 else
688 {
3ee5041c
SM
689 struct lisp_parse_state state;
690 lossage:
195d1361
RS
691 /* We had two kinds of string delimiters mixed up
692 together. Decode this going forwards.
02010917 693 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
694 to the one in question; this records where we
695 last passed a comment starter. */
eb35b628
RS
696 /* If we did not already find the defun start, find it now. */
697 if (defun_start == 0)
698 {
699 defun_start = find_defun_start (comment_end, comment_end_byte);
700 defun_start_byte = find_start_value_byte;
701 }
02010917 702 do
195d1361 703 {
02010917
SM
704 scan_sexps_forward (&state,
705 defun_start, defun_start_byte,
706 comment_end, -10000, 0, Qnil, 0);
707 defun_start = comment_end;
708 if (state.incomment == (comnested ? 1 : -1)
709 && state.comstyle == comstyle)
710 from = state.comstr_start;
711 else
712 {
713 from = comment_end;
714 if (state.incomment)
715 /* If comment_end is inside some other comment, maybe ours
716 is nested, so we need to try again from within the
717 surrounding comment. Example: { a (* " *) */
718 {
719 /* FIXME: We should advance by one or two chars. */
720 defun_start = state.comstr_start + 2;
721 defun_start_byte = CHAR_TO_BYTE (defun_start);
722 }
723 }
724 } while (defun_start < comment_end);
725
6a140a74 726 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
727 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
728 }
7d0393cf 729
95ff8dfc 730 done:
6a140a74
RS
731 *charpos_ptr = from;
732 *bytepos_ptr = from_byte;
733
1aa963c8 734 return (from == comment_end) ? -1 : from;
195d1361
RS
735}
736\f
8489eb67 737DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
fdb82f93
PJ
738 doc: /* Return t if OBJECT is a syntax table.
739Currently, any char-table counts as a syntax table. */)
740 (object)
2203e1e8 741 Lisp_Object object;
8489eb67 742{
2203e1e8 743 if (CHAR_TABLE_P (object)
e704cb4b 744 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
745 return Qt;
746 return Qnil;
747}
748
8ea151b2 749static void
8489eb67
RS
750check_syntax_table (obj)
751 Lisp_Object obj;
752{
47f5f6ae
KS
753 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
754 Qsyntax_table_p, obj);
7d0393cf 755}
8489eb67 756
8489eb67 757DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
fdb82f93
PJ
758 doc: /* Return the current syntax table.
759This is the one specified by the current buffer. */)
760 ()
8489eb67
RS
761{
762 return current_buffer->syntax_table;
763}
764
765DEFUN ("standard-syntax-table", Fstandard_syntax_table,
766 Sstandard_syntax_table, 0, 0, 0,
fdb82f93
PJ
767 doc: /* Return the standard syntax table.
768This is the one used for new buffers. */)
769 ()
8489eb67
RS
770{
771 return Vstandard_syntax_table;
772}
773
774DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
fdb82f93
PJ
775 doc: /* Construct a new syntax table and return it.
776It is a copy of the TABLE, which defaults to the standard syntax table. */)
777 (table)
8489eb67
RS
778 Lisp_Object table;
779{
8ea151b2
RS
780 Lisp_Object copy;
781
265a9e55 782 if (!NILP (table))
8ea151b2
RS
783 check_syntax_table (table);
784 else
785 table = Vstandard_syntax_table;
786
787 copy = Fcopy_sequence (table);
0f867324
RS
788
789 /* Only the standard syntax table should have a default element.
790 Other syntax tables should inherit from parents instead. */
791 XCHAR_TABLE (copy)->defalt = Qnil;
792
793 /* Copied syntax tables should all have parents.
794 If we copied one with no parent, such as the standard syntax table,
795 use the standard syntax table as the copy's parent. */
796 if (NILP (XCHAR_TABLE (copy)->parent))
797 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 798 return copy;
8489eb67
RS
799}
800
801DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
fdb82f93
PJ
802 doc: /* Select a new syntax table for the current buffer.
803One argument, a syntax table. */)
804 (table)
8489eb67
RS
805 Lisp_Object table;
806{
4e4dfa78 807 int idx;
8ea151b2 808 check_syntax_table (table);
8489eb67
RS
809 current_buffer->syntax_table = table;
810 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
811 idx = PER_BUFFER_VAR_IDX (syntax_table);
812 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
813 return table;
814}
815\f
816/* Convert a letter which signifies a syntax code
817 into the code it signifies.
195d1361 818 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
819
820unsigned char syntax_spec_code[0400] =
821 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
822 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
823 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
824 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 825 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
826 (char) Smath, 0377, 0377, (char) Squote,
827 (char) Sopen, (char) Sclose, 0377, 0377,
828 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
829 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
830 0377, 0377, 0377, 0377,
831 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 832 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
833 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
834 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
835 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
836 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
837 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
838 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 839 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
840 };
841
195d1361 842/* Indexed by syntax code, give the letter that describes it. */
8489eb67 843
195d1361 844char syntax_code_spec[16] =
8489eb67 845 {
195d1361
RS
846 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
847 '!', '|'
8489eb67 848 };
93da5fff
KH
849
850/* Indexed by syntax code, give the object (cons of syntax code and
851 nil) to be stored in syntax table. Since these objects can be
852 shared among syntax tables, we generate them in advance. By
853 sharing objects, the function `describe-syntax' can give a more
854 compact listing. */
855static Lisp_Object Vsyntax_code_object;
856
8489eb67 857\f
8ea151b2
RS
858/* Look up the value for CHARACTER in syntax table TABLE's parent
859 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
860 for CHARACTER. It's actually used only when not compiled with GCC. */
861
862Lisp_Object
863syntax_parent_lookup (table, character)
864 Lisp_Object table;
865 int character;
866{
867 Lisp_Object value;
868
869 while (1)
870 {
871 table = XCHAR_TABLE (table)->parent;
872 if (NILP (table))
873 return Qnil;
874
875 value = XCHAR_TABLE (table)->contents[character];
876 if (!NILP (value))
877 return value;
878 }
879}
880
8489eb67 881DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
fdb82f93
PJ
882 doc: /* Return the syntax code of CHARACTER, described by a character.
883For example, if CHARACTER is a word constituent,
884the character `w' is returned.
885The characters that correspond to various syntax codes
886are listed in the documentation of `modify-syntax-entry'. */)
887 (character)
2203e1e8 888 Lisp_Object character;
8489eb67 889{
8ea151b2 890 int char_int;
195d1361
RS
891 gl_state.current_syntax_table = current_buffer->syntax_table;
892
893 gl_state.use_global = 0;
b7826503 894 CHECK_NUMBER (character);
2203e1e8 895 char_int = XINT (character);
8ea151b2 896 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
897}
898
899DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
fdb82f93
PJ
900 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
901 (character)
2203e1e8 902 Lisp_Object character;
beefa22e 903{
8ea151b2 904 int char_int, code;
195d1361
RS
905 gl_state.current_syntax_table = current_buffer->syntax_table;
906 gl_state.use_global = 0;
b7826503 907 CHECK_NUMBER (character);
2203e1e8 908 char_int = XINT (character);
8ea151b2 909 code = SYNTAX (char_int);
a8bd7cd8 910 if (code == Sopen || code == Sclose)
2e34157c 911 return SYNTAX_MATCH (char_int);
beefa22e 912 return Qnil;
8489eb67
RS
913}
914
c65adb44 915DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
fdb82f93
PJ
916 doc: /* Convert a syntax specification STRING into syntax cell form.
917STRING should be a string as it is allowed as argument of
918`modify-syntax-entry'. Value is the equivalent cons cell
bce3dc36 919\(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
fdb82f93
PJ
920text property. */)
921 (string)
c65adb44 922 Lisp_Object string;
8489eb67 923{
2e567bd3 924 register const unsigned char *p;
8489eb67 925 register enum syntaxcode code;
247e20a8 926 int val;
8ea151b2 927 Lisp_Object match;
8489eb67 928
b7826503 929 CHECK_STRING (string);
8489eb67 930
d5db4077 931 p = SDATA (string);
8489eb67
RS
932 code = (enum syntaxcode) syntax_spec_code[*p++];
933 if (((int) code & 0377) == 0377)
b764a653 934 error ("Invalid syntax description letter: %c", p[-1]);
8489eb67 935
8ea151b2 936 if (code == Sinherit)
c65adb44 937 return Qnil;
8ea151b2
RS
938
939 if (*p)
d1be9f0f 940 {
93da5fff 941 int len;
7c19e8e1 942 int character = (STRING_CHAR_AND_LENGTH
d5db4077 943 (p, SBYTES (string) - 1, len));
93da5fff 944 XSETINT (match, character);
d1be9f0f
RS
945 if (XFASTINT (match) == ' ')
946 match = Qnil;
93da5fff 947 p += len;
d1be9f0f
RS
948 }
949 else
8ea151b2 950 match = Qnil;
8489eb67 951
8ea151b2 952 val = (int) code;
8489eb67
RS
953 while (*p)
954 switch (*p++)
955 {
956 case '1':
247e20a8 957 val |= 1 << 16;
8489eb67
RS
958 break;
959
960 case '2':
247e20a8 961 val |= 1 << 17;
8489eb67
RS
962 break;
963
964 case '3':
247e20a8 965 val |= 1 << 18;
8489eb67
RS
966 break;
967
968 case '4':
247e20a8 969 val |= 1 << 19;
8489eb67
RS
970 break;
971
972 case 'p':
247e20a8 973 val |= 1 << 20;
8489eb67 974 break;
e5d4f4dc
RS
975
976 case 'b':
247e20a8 977 val |= 1 << 21;
e5d4f4dc 978 break;
95ff8dfc
RS
979
980 case 'n':
981 val |= 1 << 22;
982 break;
8489eb67 983 }
7d0393cf 984
93da5fff 985 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
c65adb44 986 return XVECTOR (Vsyntax_code_object)->contents[val];
93da5fff
KH
987 else
988 /* Since we can't use a shared object, let's make a new one. */
c65adb44
SM
989 return Fcons (make_number (val), match);
990}
991
fdb82f93 992/* I really don't know why this is interactive
2c9e1900 993 help-form should at least be made useful whilst reading the second arg. */
7d0393cf 994DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
c65adb44 995 "cSet syntax for character: \nsSet syntax for %s to: ",
a50a10a0 996 doc: /* Set syntax for character CHAR according to string NEWENTRY.
d7ee9fab 997The syntax is changed only for table SYNTAX-TABLE, which defaults to
fdb82f93
PJ
998 the current buffer's syntax table.
999The first character of NEWENTRY should be one of the following:
1000 Space or - whitespace syntax. w word constituent.
1001 _ symbol constituent. . punctuation.
1002 ( open-parenthesis. ) close-parenthesis.
1003 " string quote. \\ escape.
1004 $ paired delimiter. ' expression quote or prefix operator.
1005 < comment starter. > comment ender.
1006 / character-quote. @ inherit from `standard-syntax-table'.
1007 | generic string fence. ! generic comment fence.
1008
1009Only single-character comment start and end sequences are represented thus.
1010Two-character sequences are represented as described below.
1011The second character of NEWENTRY is the matching parenthesis,
1012 used only if the first character is `(' or `)'.
1013Any additional characters are flags.
1014Defined flags are the characters 1, 2, 3, 4, b, p, and n.
a50a10a0
PJ
1015 1 means CHAR is the start of a two-char comment start sequence.
1016 2 means CHAR is the second character of such a sequence.
1017 3 means CHAR is the start of a two-char comment end sequence.
1018 4 means CHAR is the second character of such a sequence.
fdb82f93
PJ
1019
1020There can be up to two orthogonal comment sequences. This is to support
1021language modes such as C++. By default, all comment sequences are of style
1022a, but you can set the comment sequence style to b (on the second character
1023of a comment-start, or the first character of a comment-end sequence) using
1024this flag:
a50a10a0
PJ
1025 b means CHAR is part of comment sequence b.
1026 n means CHAR is part of a nestable comment sequence.
fdb82f93 1027
a50a10a0 1028 p means CHAR is a prefix character for `backward-prefix-chars';
fdb82f93 1029 such characters are treated as whitespace when they occur
a50a10a0
PJ
1030 between expressions.
1031usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
fdb82f93 1032 (c, newentry, syntax_table)
c65adb44
SM
1033 Lisp_Object c, newentry, syntax_table;
1034{
b7826503 1035 CHECK_NUMBER (c);
c65adb44
SM
1036
1037 if (NILP (syntax_table))
1038 syntax_table = current_buffer->syntax_table;
1039 else
1040 check_syntax_table (syntax_table);
8489eb67 1041
c65adb44 1042 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Fstring_to_syntax (newentry));
e5b94d44
CY
1043
1044 /* We clear the regexp cache, since character classes can now have
1045 different values from those in the compiled regexps.*/
1046 clear_regexp_cache ();
1047
8489eb67
RS
1048 return Qnil;
1049}
1050\f
1051/* Dump syntax table to buffer in human-readable format */
1052
62abe9cb
SM
1053DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1054 Sinternal_describe_syntax_value, 1, 1, 0,
1055 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1056 (syntax)
1057 Lisp_Object syntax;
8489eb67
RS
1058{
1059 register enum syntaxcode code;
e6365855 1060 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 1061 char str[2];
62abe9cb 1062 Lisp_Object first, match_lisp, value = syntax;
8489eb67 1063
8ea151b2
RS
1064 if (NILP (value))
1065 {
62abe9cb
SM
1066 insert_string ("default");
1067 return syntax;
8ea151b2
RS
1068 }
1069
908b7fea
KH
1070 if (CHAR_TABLE_P (value))
1071 {
62abe9cb
SM
1072 insert_string ("deeper char-table ...");
1073 return syntax;
908b7fea
KH
1074 }
1075
8ea151b2
RS
1076 if (!CONSP (value))
1077 {
62abe9cb
SM
1078 insert_string ("invalid");
1079 return syntax;
8ea151b2
RS
1080 }
1081
c1d497be
KR
1082 first = XCAR (value);
1083 match_lisp = XCDR (value);
8ea151b2
RS
1084
1085 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1086 {
62abe9cb
SM
1087 insert_string ("invalid");
1088 return syntax;
8489eb67
RS
1089 }
1090
e704cb4b 1091 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1092 start1 = (XINT (first) >> 16) & 1;
1093 start2 = (XINT (first) >> 17) & 1;
1094 end1 = (XINT (first) >> 18) & 1;
1095 end2 = (XINT (first) >> 19) & 1;
1096 prefix = (XINT (first) >> 20) & 1;
1097 comstyle = (XINT (first) >> 21) & 1;
e6365855 1098 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1099
1100 if ((int) code < 0 || (int) code >= (int) Smax)
1101 {
1102 insert_string ("invalid");
62abe9cb 1103 return syntax;
8489eb67
RS
1104 }
1105 desc = syntax_code_spec[(int) code];
1106
1107 str[0] = desc, str[1] = 0;
1108 insert (str, 1);
1109
93da5fff
KH
1110 if (NILP (match_lisp))
1111 insert (" ", 1);
1112 else
1113 insert_char (XINT (match_lisp));
8489eb67 1114
8489eb67
RS
1115 if (start1)
1116 insert ("1", 1);
1117 if (start2)
1118 insert ("2", 1);
1119
1120 if (end1)
1121 insert ("3", 1);
1122 if (end2)
1123 insert ("4", 1);
1124
1125 if (prefix)
1126 insert ("p", 1);
e5d4f4dc
RS
1127 if (comstyle)
1128 insert ("b", 1);
e6365855
SM
1129 if (comnested)
1130 insert ("n", 1);
8489eb67
RS
1131
1132 insert_string ("\twhich means: ");
1133
0220c518 1134 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1135 {
1136 case Swhitespace:
1137 insert_string ("whitespace"); break;
1138 case Spunct:
1139 insert_string ("punctuation"); break;
1140 case Sword:
1141 insert_string ("word"); break;
1142 case Ssymbol:
1143 insert_string ("symbol"); break;
1144 case Sopen:
1145 insert_string ("open"); break;
1146 case Sclose:
1147 insert_string ("close"); break;
1148 case Squote:
d671382e 1149 insert_string ("prefix"); break;
8489eb67
RS
1150 case Sstring:
1151 insert_string ("string"); break;
1152 case Smath:
1153 insert_string ("math"); break;
1154 case Sescape:
1155 insert_string ("escape"); break;
1156 case Scharquote:
1157 insert_string ("charquote"); break;
1158 case Scomment:
1159 insert_string ("comment"); break;
1160 case Sendcomment:
1161 insert_string ("endcomment"); break;
d671382e
SM
1162 case Sinherit:
1163 insert_string ("inherit"); break;
1164 case Scomment_fence:
1165 insert_string ("comment fence"); break;
1166 case Sstring_fence:
1167 insert_string ("string fence"); break;
8489eb67
RS
1168 default:
1169 insert_string ("invalid");
62abe9cb 1170 return syntax;
8489eb67
RS
1171 }
1172
8ea151b2 1173 if (!NILP (match_lisp))
8489eb67
RS
1174 {
1175 insert_string (", matches ");
8ea151b2 1176 insert_char (XINT (match_lisp));
8489eb67
RS
1177 }
1178
1179 if (start1)
1180 insert_string (",\n\t is the first character of a comment-start sequence");
1181 if (start2)
1182 insert_string (",\n\t is the second character of a comment-start sequence");
1183
1184 if (end1)
1185 insert_string (",\n\t is the first character of a comment-end sequence");
1186 if (end2)
1187 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1188 if (comstyle)
1189 insert_string (" (comment style b)");
e6365855
SM
1190 if (comnested)
1191 insert_string (" (nestable)");
e5d4f4dc 1192
8489eb67
RS
1193 if (prefix)
1194 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1195
62abe9cb
SM
1196 return syntax;
1197}
8489eb67 1198\f
195d1361
RS
1199int parse_sexp_ignore_comments;
1200
8489eb67
RS
1201/* Return the position across COUNT words from FROM.
1202 If that many words cannot be found before the end of the buffer, return 0.
1203 COUNT negative means scan backward and stop at word beginning. */
1204
dfcf069d 1205int
8489eb67
RS
1206scan_words (from, count)
1207 register int from, count;
1208{
1209 register int beg = BEGV;
1210 register int end = ZV;
6a140a74 1211 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1212 register enum syntaxcode code;
1213 int ch0, ch1;
8489eb67
RS
1214
1215 immediate_quit = 1;
1216 QUIT;
1217
195d1361
RS
1218 SETUP_SYNTAX_TABLE (from, count);
1219
8489eb67
RS
1220 while (count > 0)
1221 {
1222 while (1)
1223 {
1224 if (from == end)
1225 {
1226 immediate_quit = 0;
1227 return 0;
1228 }
195d1361 1229 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1230 ch0 = FETCH_CHAR (from_byte);
93da5fff 1231 code = SYNTAX (ch0);
6a140a74 1232 INC_BOTH (from, from_byte);
8489eb67
RS
1233 if (words_include_escapes
1234 && (code == Sescape || code == Scharquote))
1235 break;
1236 if (code == Sword)
1237 break;
8489eb67 1238 }
93da5fff
KH
1239 /* Now CH0 is a character which begins a word and FROM is the
1240 position of the next character. */
8489eb67
RS
1241 while (1)
1242 {
1243 if (from == end) break;
195d1361 1244 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1245 ch1 = FETCH_CHAR (from_byte);
93da5fff 1246 code = SYNTAX (ch1);
8489eb67
RS
1247 if (!(words_include_escapes
1248 && (code == Sescape || code == Scharquote)))
93da5fff 1249 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1250 break;
6a140a74 1251 INC_BOTH (from, from_byte);
93da5fff 1252 ch0 = ch1;
8489eb67
RS
1253 }
1254 count--;
1255 }
1256 while (count < 0)
1257 {
1258 while (1)
1259 {
1260 if (from == beg)
1261 {
1262 immediate_quit = 0;
1263 return 0;
1264 }
6a140a74 1265 DEC_BOTH (from, from_byte);
195d1361 1266 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1267 ch1 = FETCH_CHAR (from_byte);
93da5fff 1268 code = SYNTAX (ch1);
8489eb67
RS
1269 if (words_include_escapes
1270 && (code == Sescape || code == Scharquote))
1271 break;
1272 if (code == Sword)
1273 break;
8489eb67 1274 }
93da5fff
KH
1275 /* Now CH1 is a character which ends a word and FROM is the
1276 position of it. */
8489eb67
RS
1277 while (1)
1278 {
6a140a74
RS
1279 if (from == beg)
1280 break;
fc13f0a2 1281 DEC_BOTH (from, from_byte);
195d1361 1282 UPDATE_SYNTAX_TABLE_BACKWARD (from);
fc13f0a2 1283 ch0 = FETCH_CHAR (from_byte);
93da5fff 1284 code = SYNTAX (ch0);
8489eb67
RS
1285 if (!(words_include_escapes
1286 && (code == Sescape || code == Scharquote)))
93da5fff 1287 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
fc13f0a2
CY
1288 {
1289 INC_BOTH (from, from_byte);
1290 break;
1291 }
93da5fff 1292 ch1 = ch0;
8489eb67
RS
1293 }
1294 count++;
1295 }
1296
1297 immediate_quit = 0;
1298
1299 return from;
1300}
1301
29e16385 1302DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "p",
fdb82f93
PJ
1303 doc: /* Move point forward ARG words (backward if ARG is negative).
1304Normally returns t.
1305If an edge of the buffer or a field boundary is reached, point is left there
1306and the function returns nil. Field boundaries are not noticed if
1307`inhibit-field-text-motion' is non-nil. */)
29e16385
RS
1308 (arg)
1309 Lisp_Object arg;
8489eb67 1310{
c096ae4d 1311 Lisp_Object tmp;
2c6ea900 1312 int orig_val, val;
8489eb67 1313
29e16385
RS
1314 if (NILP (arg))
1315 XSETFASTINT (arg, 1);
1316 else
1317 CHECK_NUMBER (arg);
1318
1319 val = orig_val = scan_words (PT, XINT (arg));
2c6ea900 1320 if (! orig_val)
29e16385 1321 val = XINT (arg) > 0 ? ZV : BEGV;
5878ee6f 1322
b8855607 1323 /* Avoid jumping out of an input field. */
c096ae4d
SM
1324 tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1325 Qt, Qnil, Qnil);
1326 val = XFASTINT (tmp);
7d0393cf 1327
8489eb67 1328 SET_PT (val);
e6d8341f 1329 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1330}
1331\f
195d1361
RS
1332Lisp_Object skip_chars ();
1333
1334DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
fdb82f93
PJ
1335 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1336STRING is like the inside of a `[...]' in a regular expression
1337except that `]' is never special and `\\' quotes `^', `-' or `\\'
1338 (but not as the end of a range; quoting is never needed there).
1339Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1340With arg "^a-zA-Z", skips nonletters stopping before first letter.
a1bc88d4
RS
1341Char classes, e.g. `[:alpha:]', are supported.
1342
1343Returns the distance traveled, either zero or positive. */)
fdb82f93 1344 (string, lim)
195d1361
RS
1345 Lisp_Object string, lim;
1346{
a1bc88d4 1347 return skip_chars (1, 0, string, lim, 1);
195d1361
RS
1348}
1349
1350DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
fdb82f93
PJ
1351 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1352See `skip-chars-forward' for details.
1353Returns the distance traveled, either zero or negative. */)
1354 (string, lim)
195d1361
RS
1355 Lisp_Object string, lim;
1356{
a1bc88d4 1357 return skip_chars (0, 0, string, lim, 1);
195d1361
RS
1358}
1359
1360DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
fdb82f93
PJ
1361 doc: /* Move point forward across chars in specified syntax classes.
1362SYNTAX is a string of syntax code characters.
1363Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1364If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1365This function returns the distance traveled, either zero or positive. */)
1366 (syntax, lim)
195d1361
RS
1367 Lisp_Object syntax, lim;
1368{
a1bc88d4 1369 return skip_chars (1, 1, syntax, lim, 0);
195d1361
RS
1370}
1371
1372DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
fdb82f93
PJ
1373 doc: /* Move point backward across chars in specified syntax classes.
1374SYNTAX is a string of syntax code characters.
1375Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1376If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1377This function returns the distance traveled, either zero or negative. */)
1378 (syntax, lim)
195d1361
RS
1379 Lisp_Object syntax, lim;
1380{
a1bc88d4 1381 return skip_chars (0, 1, syntax, lim, 0);
195d1361
RS
1382}
1383
6a140a74 1384static Lisp_Object
a1bc88d4 1385skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes)
195d1361
RS
1386 int forwardp, syntaxp;
1387 Lisp_Object string, lim;
a1bc88d4 1388 int handle_iso_classes;
195d1361 1389{
195d1361
RS
1390 register unsigned int c;
1391 unsigned char fastmap[0400];
7e68b0ea 1392 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
9690d026
KH
1393 of which codes don't fit in FASTMAP. In that case, set the
1394 ranges of characters in CHAR_RANGES. */
5df4982e 1395 int *char_ranges;
7e68b0ea 1396 int n_char_ranges = 0;
195d1361 1397 int negate = 0;
4101e6fe 1398 register int i, i_byte;
7e68b0ea 1399 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1674d9a2
RS
1400 int string_multibyte;
1401 int size_byte;
2e567bd3 1402 const unsigned char *str;
82d497fc 1403 int len;
a1bc88d4 1404 Lisp_Object iso_classes;
195d1361 1405
b7826503 1406 CHECK_STRING (string);
d5db4077 1407 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2);
1674d9a2 1408 string_multibyte = STRING_MULTIBYTE (string);
d5db4077
KR
1409 str = SDATA (string);
1410 size_byte = SBYTES (string);
a1bc88d4 1411 iso_classes = Qnil;
195d1361 1412
82d497fc
KH
1413 /* Adjust the multibyteness of the string to that of the buffer. */
1414 if (multibyte != string_multibyte)
1415 {
1416 int nbytes;
1417
1418 if (multibyte)
d5db4077
KR
1419 nbytes = count_size_as_multibyte (SDATA (string),
1420 SCHARS (string));
82d497fc 1421 else
d5db4077 1422 nbytes = SCHARS (string);
82d497fc
KH
1423 if (nbytes != size_byte)
1424 {
2e567bd3
KR
1425 unsigned char *tmp = (unsigned char *) alloca (nbytes);
1426 copy_text (SDATA (string), tmp, size_byte,
82d497fc
KH
1427 string_multibyte, multibyte);
1428 size_byte = nbytes;
2e567bd3 1429 str = tmp;
82d497fc
KH
1430 }
1431 }
1432
195d1361
RS
1433 if (NILP (lim))
1434 XSETINT (lim, forwardp ? ZV : BEGV);
1435 else
b7826503 1436 CHECK_NUMBER_COERCE_MARKER (lim);
195d1361
RS
1437
1438 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1439 if (XINT (lim) > ZV)
1440 XSETFASTINT (lim, ZV);
1441 if (XINT (lim) < BEGV)
1442 XSETFASTINT (lim, BEGV);
1443
195d1361
RS
1444 bzero (fastmap, sizeof fastmap);
1445
82d497fc 1446 i_byte = 0;
4101e6fe 1447
13090112 1448 if (i_byte < size_byte
d5db4077 1449 && SREF (string, 0) == '^')
195d1361 1450 {
82d497fc 1451 negate = 1; i_byte++;
195d1361
RS
1452 }
1453
1454 /* Find the characters specified and set their elements of fastmap.
1455 If syntaxp, each character counts as itself.
1456 Otherwise, handle backslashes and ranges specially. */
1457
13090112 1458 while (i_byte < size_byte)
195d1361 1459 {
82d497fc
KH
1460 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1461 i_byte += len;
4101e6fe 1462
195d1361 1463 if (syntaxp)
4101e6fe 1464 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1465 else
1466 {
a1bc88d4
RS
1467 if (handle_iso_classes && c == '['
1468 && i_byte < size_byte
1469 && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':')
1470 {
1471 const unsigned char *class_beg = str + i_byte + 1;
1472 const unsigned char *class_end = class_beg;
b3bda4fd 1473 const unsigned char *class_limit = str + size_byte - 2;
db9cd97a 1474 /* Leave room for the null. */
a1bc88d4
RS
1475 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1476 re_wctype_t cc;
1477
1478 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1479 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1480
b3bda4fd
KS
1481 while (class_end < class_limit
1482 && *class_end >= 'a' && *class_end <= 'z')
a1bc88d4
RS
1483 class_end++;
1484
b3bda4fd
KS
1485 if (class_end == class_beg
1486 || *class_end != ':' || class_end[1] != ']')
1487 goto not_a_class_name;
a1bc88d4
RS
1488
1489 bcopy (class_beg, class_name, class_end - class_beg);
1490 class_name[class_end - class_beg] = 0;
1491
1492 cc = re_wctype (class_name);
1493 if (cc == 0)
1494 error ("Invalid ISO C character class");
1495
1496 iso_classes = Fcons (make_number (cc), iso_classes);
1497
1498 i_byte = class_end + 2 - str;
1499 continue;
1500 }
1501
b3bda4fd 1502 not_a_class_name:
195d1361
RS
1503 if (c == '\\')
1504 {
13090112 1505 if (i_byte == size_byte)
4101e6fe
RS
1506 break;
1507
57951c2e
AS
1508 c = STRING_CHAR_AND_LENGTH (str + i_byte,
1509 size_byte - i_byte, len);
82d497fc 1510 i_byte += len;
195d1361 1511 }
57951c2e
AS
1512 /* Treat `-' as range character only if another character
1513 follows. */
1514 if (i_byte + 1 < size_byte
82d497fc 1515 && str[i_byte] == '-')
195d1361 1516 {
9690d026 1517 unsigned int c2;
4101e6fe
RS
1518
1519 /* Skip over the dash. */
82d497fc 1520 i_byte++;
4101e6fe 1521
4101e6fe 1522 /* Get the end of the range. */
57951c2e
AS
1523 c2 = STRING_CHAR_AND_LENGTH (str + i_byte,
1524 size_byte - i_byte, len);
82d497fc 1525 i_byte += len;
7e68b0ea 1526
4101e6fe 1527 if (SINGLE_BYTE_CHAR_P (c))
06274af5
KH
1528 {
1529 if (! SINGLE_BYTE_CHAR_P (c2))
e39091b6 1530 {
7ae339a7
KH
1531 /* Handle a range starting with a character of
1532 less than 256, and ending with a character of
1533 not less than 256. Split that into two
1534 ranges, the low one ending at 0377, and the
1535 high one starting at the smallest character
1536 in the charset of C2 and ending at C2. */
e39091b6
KH
1537 int charset = CHAR_CHARSET (c2);
1538 int c1 = MAKE_CHAR (charset, 0, 0);
1539
e39091b6
KH
1540 char_ranges[n_char_ranges++] = c1;
1541 char_ranges[n_char_ranges++] = c2;
465cf84c 1542 c2 = 0377;
e39091b6 1543 }
06274af5
KH
1544 while (c <= c2)
1545 {
1546 fastmap[c] = 1;
1547 c++;
1548 }
1549 }
9690d026 1550 else if (c <= c2) /* Both C and C2 are multibyte char. */
195d1361 1551 {
9690d026
KH
1552 char_ranges[n_char_ranges++] = c;
1553 char_ranges[n_char_ranges++] = c2;
195d1361 1554 }
195d1361
RS
1555 }
1556 else
7e68b0ea 1557 {
e39091b6
KH
1558 if (SINGLE_BYTE_CHAR_P (c))
1559 fastmap[c] = 1;
1560 else
7e68b0ea 1561 {
4101e6fe
RS
1562 char_ranges[n_char_ranges++] = c;
1563 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1564 }
1565 }
195d1361
RS
1566 }
1567 }
1568
9690d026 1569 /* If ^ was the first character, complement the fastmap. */
195d1361
RS
1570 if (negate)
1571 for (i = 0; i < sizeof fastmap; i++)
9690d026 1572 fastmap[i] ^= 1;
195d1361
RS
1573
1574 {
1575 int start_point = PT;
1576 int pos = PT;
6a140a74 1577 int pos_byte = PT_BYTE;
9af7511a
KH
1578 unsigned char *p = PT_ADDR, *endp, *stop;
1579
1580 if (forwardp)
1581 {
1582 endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1583 stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
9af7511a
KH
1584 }
1585 else
1586 {
1587 endp = CHAR_POS_ADDR (XINT (lim));
d7ee9fab 1588 stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
9af7511a 1589 }
195d1361
RS
1590
1591 immediate_quit = 1;
1592 if (syntaxp)
1593 {
1594 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1595 if (forwardp)
1596 {
7e68b0ea 1597 if (multibyte)
9af7511a
KH
1598 while (1)
1599 {
1600 int nbytes;
1601
1602 if (p >= stop)
ee0cdb48 1603 {
9af7511a
KH
1604 if (p >= endp)
1605 break;
1606 p = GAP_END_ADDR;
1607 stop = endp;
ee0cdb48 1608 }
9af7511a
KH
1609 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
1610 if (! fastmap[(int) SYNTAX (c)])
1611 break;
1612 p += nbytes, pos++, pos_byte += nbytes;
1613 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1614 }
7e68b0ea 1615 else
9af7511a
KH
1616 while (1)
1617 {
1618 if (p >= stop)
1619 {
1620 if (p >= endp)
1621 break;
1622 p = GAP_END_ADDR;
1623 stop = endp;
1624 }
1625 if (! fastmap[(int) SYNTAX (*p)])
1626 break;
1627 p++, pos++;
1628 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1629 }
195d1361
RS
1630 }
1631 else
1632 {
7e68b0ea 1633 if (multibyte)
9af7511a
KH
1634 while (1)
1635 {
1636 unsigned char *prev_p;
1637 int nbytes;
1638
1639 if (p <= stop)
1640 {
1641 if (p <= endp)
7e68b0ea 1642 break;
9af7511a
KH
1643 p = GPT_ADDR;
1644 stop = endp;
1645 }
1646 prev_p = p;
1647 while (--p >= stop && ! CHAR_HEAD_P (*p));
1648 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1649 if (prev_p - p > nbytes)
1650 p = prev_p - 1, c = *p, nbytes = 1;
1651 else
1652 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
1653 pos--, pos_byte -= nbytes;
1654 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
1655 if (! fastmap[(int) SYNTAX (c)])
1656 {
1657 pos++;
1658 pos_byte += nbytes;
1659 break;
1660 }
1661 }
7e68b0ea 1662 else
9af7511a
KH
1663 while (1)
1664 {
1665 if (p <= stop)
ee0cdb48 1666 {
9af7511a 1667 if (p <= endp)
4c8535eb 1668 break;
9af7511a
KH
1669 p = GPT_ADDR;
1670 stop = endp;
ee0cdb48 1671 }
d46b87e9 1672 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
9af7511a
KH
1673 if (! fastmap[(int) SYNTAX (p[-1])])
1674 break;
1675 p--, pos--;
9af7511a 1676 }
195d1361
RS
1677 }
1678 }
1679 else
1680 {
1681 if (forwardp)
1682 {
7e68b0ea 1683 if (multibyte)
9af7511a 1684 while (1)
7e68b0ea 1685 {
9af7511a
KH
1686 int nbytes;
1687
1688 if (p >= stop)
1689 {
1690 if (p >= endp)
1691 break;
1692 p = GAP_END_ADDR;
1693 stop = endp;
1694 }
1695 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes);
a1bc88d4
RS
1696
1697 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1698 {
1699 if (negate)
1700 break;
1701 else
1702 goto fwd_ok;
1703 }
1704
9690d026 1705 if (SINGLE_BYTE_CHAR_P (c))
7e68b0ea 1706 {
9690d026
KH
1707 if (!fastmap[c])
1708 break;
1709 }
1710 else
1711 {
1712 /* If we are looking at a multibyte character,
1713 we must look up the character in the table
1714 CHAR_RANGES. If there's no data in the
1715 table, that character is not what we want to
1716 skip. */
1717
e39091b6
KH
1718 /* The following code do the right thing even if
1719 n_char_ranges is zero (i.e. no data in
1720 CHAR_RANGES). */
7e68b0ea 1721 for (i = 0; i < n_char_ranges; i += 2)
e39091b6 1722 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
7e68b0ea
RS
1723 break;
1724 if (!(negate ^ (i < n_char_ranges)))
1725 break;
7e68b0ea 1726 }
a1bc88d4 1727 fwd_ok:
9af7511a 1728 p += nbytes, pos++, pos_byte += nbytes;
7e68b0ea
RS
1729 }
1730 else
9af7511a
KH
1731 while (1)
1732 {
1733 if (p >= stop)
1734 {
1735 if (p >= endp)
1736 break;
1737 p = GAP_END_ADDR;
1738 stop = endp;
1739 }
a1bc88d4
RS
1740
1741 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1742 {
1743 if (negate)
1744 break;
1745 else
1ad6e4db 1746 goto fwd_unibyte_ok;
a1bc88d4
RS
1747 }
1748
4e837cd0 1749 if (!fastmap[*p])
9af7511a 1750 break;
1ad6e4db 1751 fwd_unibyte_ok:
9af7511a
KH
1752 p++, pos++;
1753 }
195d1361
RS
1754 }
1755 else
1756 {
7e68b0ea 1757 if (multibyte)
9af7511a 1758 while (1)
7e68b0ea 1759 {
9af7511a
KH
1760 unsigned char *prev_p;
1761 int nbytes;
e39091b6 1762
9af7511a
KH
1763 if (p <= stop)
1764 {
1765 if (p <= endp)
1766 break;
1767 p = GPT_ADDR;
1768 stop = endp;
1769 }
1770 prev_p = p;
1771 while (--p >= stop && ! CHAR_HEAD_P (*p));
1772 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes);
1773 if (prev_p - p > nbytes)
1774 p = prev_p - 1, c = *p, nbytes = 1;
1775 else
1776 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH);
a1bc88d4
RS
1777
1778 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1779 {
1780 if (negate)
1781 break;
1782 else
1783 goto back_ok;
1784 }
1785
9690d026
KH
1786 if (SINGLE_BYTE_CHAR_P (c))
1787 {
1788 if (!fastmap[c])
1789 break;
1790 }
1791 else
7e68b0ea 1792 {
9690d026 1793 /* See the comment in the previous similar code. */
e39091b6
KH
1794 for (i = 0; i < n_char_ranges; i += 2)
1795 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1796 break;
1797 if (!(negate ^ (i < n_char_ranges)))
1798 break;
7e68b0ea 1799 }
a1bc88d4 1800 back_ok:
9af7511a 1801 pos--, pos_byte -= nbytes;
7e68b0ea
RS
1802 }
1803 else
9af7511a
KH
1804 while (1)
1805 {
1806 if (p <= stop)
1807 {
1808 if (p <= endp)
1809 break;
1810 p = GPT_ADDR;
1811 stop = endp;
1812 }
a1bc88d4
RS
1813
1814 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1815 {
1816 if (negate)
1817 break;
1818 else
1ad6e4db 1819 goto back_unibyte_ok;
a1bc88d4
RS
1820 }
1821
4e837cd0 1822 if (!fastmap[p[-1]])
9af7511a 1823 break;
1ad6e4db 1824 back_unibyte_ok:
9af7511a
KH
1825 p--, pos--;
1826 }
195d1361
RS
1827 }
1828 }
7e68b0ea 1829
6a140a74
RS
1830#if 0 /* Not needed now that a position in mid-character
1831 cannot be specified in Lisp. */
7e68b0ea
RS
1832 if (multibyte
1833 /* INC_POS or DEC_POS might have moved POS over LIM. */
1834 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1835 pos = XINT (lim);
6a140a74 1836#endif
7e68b0ea 1837
6a140a74
RS
1838 if (! multibyte)
1839 pos_byte = pos;
1840
1841 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1842 immediate_quit = 0;
1843
1844 return make_number (PT - start_point);
1845 }
1846}
a1bc88d4
RS
1847
1848/* Return 1 if character C belongs to one of the ISO classes
1849 in the list ISO_CLASSES. Each class is represented by an
1850 integer which is its type according to re_wctype. */
1851
1852static int
1853in_classes (c, iso_classes)
1854 int c;
1855 Lisp_Object iso_classes;
1856{
1857 int fits_class = 0;
1858
1859 while (! NILP (iso_classes))
1860 {
1861 Lisp_Object elt;
1862 elt = XCAR (iso_classes);
1863 iso_classes = XCDR (iso_classes);
1864
1865 if (re_iswctype (c, XFASTINT (elt)))
1866 fits_class = 1;
1867 }
1868
1869 return fits_class;
1870}
195d1361 1871\f
95ff8dfc
RS
1872/* Jump over a comment, assuming we are at the beginning of one.
1873 FROM is the current position.
1874 FROM_BYTE is the bytepos corresponding to FROM.
1875 Do not move past STOP (a charpos).
1876 The comment over which we have to jump is of style STYLE
1877 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1878 NESTING should be positive to indicate the nesting at the beginning
1879 for nested comments and should be zero or negative else.
1880 ST_COMMENT_STYLE cannot be nested.
1881 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1882 (or 0 If the search cannot start in the middle of a two-character).
1883
1884 If successful, return 1 and store the charpos of the comment's end
1885 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1886 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1887 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1888 (as defined for state.incomment) in *INCOMMENT_PTR.
1889
1890 The comment end is the last character of the comment rather than the
1891 character just after the comment.
1892
1893 Global syntax data is assumed to initially be valid for FROM and
1894 remains valid for forward search starting at the returned position. */
1895
1896static int
1897forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1898 charpos_ptr, bytepos_ptr, incomment_ptr)
a0aa7fcf 1899 EMACS_INT from, from_byte, stop;
95ff8dfc 1900 int nesting, style, prev_syntax;
a0aa7fcf
SM
1901 EMACS_INT *charpos_ptr, *bytepos_ptr;
1902 int *incomment_ptr;
95ff8dfc
RS
1903{
1904 register int c, c1;
1905 register enum syntaxcode code;
1906 register int syntax;
1907
1908 if (nesting <= 0) nesting = -1;
1909
1910 /* Enter the loop in the middle so that we find
1911 a 2-char comment ender if we start in the middle of it. */
1912 syntax = prev_syntax;
1913 if (syntax != 0) goto forw_incomment;
1914
1915 while (1)
1916 {
1917 if (from == stop)
1918 {
1919 *incomment_ptr = nesting;
1920 *charpos_ptr = from;
1921 *bytepos_ptr = from_byte;
1922 return 0;
1923 }
1924 c = FETCH_CHAR (from_byte);
1925 syntax = SYNTAX_WITH_FLAGS (c);
1926 code = syntax & 0xff;
1927 if (code == Sendcomment
1928 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
abf8a9ff
SM
1929 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
1930 (nesting > 0 && --nesting == 0) : nesting < 0))
95ff8dfc
RS
1931 /* we have encountered a comment end of the same style
1932 as the comment sequence which began this comment
1933 section */
1934 break;
1935 if (code == Scomment_fence
1936 && style == ST_COMMENT_STYLE)
1937 /* we have encountered a comment end of the same style
1938 as the comment sequence which began this comment
1939 section. */
1940 break;
1941 if (nesting > 0
1942 && code == Scomment
abf8a9ff 1943 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
95ff8dfc
RS
1944 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1945 /* we have encountered a nested comment of the same style
1946 as the comment sequence which began this comment section */
1947 nesting++;
1948 INC_BOTH (from, from_byte);
1949 UPDATE_SYNTAX_TABLE_FORWARD (from);
7d0393cf 1950
95ff8dfc
RS
1951 forw_incomment:
1952 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1953 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1954 && (c1 = FETCH_CHAR (from_byte),
abf8a9ff
SM
1955 SYNTAX_COMEND_SECOND (c1))
1956 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1957 SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
4ffe723b
GM
1958 {
1959 if (--nesting <= 0)
1960 /* we have encountered a comment end of the same style
1961 as the comment sequence which began this comment
1962 section */
1963 break;
1964 else
1965 {
1966 INC_BOTH (from, from_byte);
1967 UPDATE_SYNTAX_TABLE_FORWARD (from);
1968 }
1969 }
95ff8dfc
RS
1970 if (nesting > 0
1971 && from < stop
1972 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1973 && (c1 = FETCH_CHAR (from_byte),
1974 SYNTAX_COMMENT_STYLE (c1) == style
abf8a9ff
SM
1975 && SYNTAX_COMSTART_SECOND (c1))
1976 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
1977 SYNTAX_COMMENT_NESTED (c1)))
95ff8dfc
RS
1978 /* we have encountered a nested comment of the same style
1979 as the comment sequence which began this comment
1980 section */
1981 {
1982 INC_BOTH (from, from_byte);
1983 UPDATE_SYNTAX_TABLE_FORWARD (from);
1984 nesting++;
1985 }
1986 }
1987 *charpos_ptr = from;
1988 *bytepos_ptr = from_byte;
1989 return 1;
1990}
1991
b3cfe0c8 1992DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
177c0ea7 1993 doc: /*
ee648c11 1994Move forward across up to COUNT comments. If COUNT is negative, move backward.
fdb82f93
PJ
1995Stop scanning if we find something other than a comment or whitespace.
1996Set point to where scanning stops.
ee648c11 1997If COUNT comments are found as expected, with nothing except whitespace
fdb82f93
PJ
1998between them, return t; otherwise return nil. */)
1999 (count)
840f481c 2000 Lisp_Object count;
b3cfe0c8 2001{
2312c580
SM
2002 register EMACS_INT from;
2003 EMACS_INT from_byte;
2004 register EMACS_INT stop;
8ea151b2 2005 register int c, c1;
b3cfe0c8
RS
2006 register enum syntaxcode code;
2007 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2008 int comnested = 0; /* whether the comment is nestable or not */
be720845 2009 int found;
2312c580
SM
2010 EMACS_INT count1;
2011 EMACS_INT out_charpos, out_bytepos;
95ff8dfc 2012 int dummy;
840f481c 2013
b7826503 2014 CHECK_NUMBER (count);
840f481c 2015 count1 = XINT (count);
195d1361 2016 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
2017
2018 immediate_quit = 1;
2019 QUIT;
2020
2021 from = PT;
6a140a74 2022 from_byte = PT_BYTE;
b3cfe0c8 2023
195d1361 2024 SETUP_SYNTAX_TABLE (from, count1);
840f481c 2025 while (count1 > 0)
b3cfe0c8 2026 {
04882296 2027 do
b3cfe0c8 2028 {
f902a008
RS
2029 int comstart_first;
2030
04882296
KH
2031 if (from == stop)
2032 {
ef316cf0 2033 SET_PT_BOTH (from, from_byte);
b7e6e612 2034 immediate_quit = 0;
04882296
KH
2035 return Qnil;
2036 }
6a140a74 2037 c = FETCH_CHAR (from_byte);
b3cfe0c8 2038 code = SYNTAX (c);
f902a008 2039 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2040 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2041 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 2042 INC_BOTH (from, from_byte);
f902a008 2043 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2044 if (from < stop && comstart_first
6a140a74 2045 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 2046 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 2047 {
7d0393cf 2048 /* We have encountered a comment start sequence and we
7fc8191e 2049 are ignoring all text inside comments. We must record
b3cfe0c8
RS
2050 the comment style this sequence begins so that later,
2051 only a comment end of the same style actually ends
7fc8191e 2052 the comment section. */
b3cfe0c8 2053 code = Scomment;
8ea151b2 2054 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 2055 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2056 INC_BOTH (from, from_byte);
95ff8dfc 2057 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 2058 }
04882296 2059 }
abf8a9ff 2060 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
6a140a74 2061
da71ab91
KH
2062 if (code == Scomment_fence)
2063 comstyle = ST_COMMENT_STYLE;
2064 else if (code != Scomment)
04882296
KH
2065 {
2066 immediate_quit = 0;
6a140a74 2067 DEC_BOTH (from, from_byte);
ef316cf0 2068 SET_PT_BOTH (from, from_byte);
04882296
KH
2069 return Qnil;
2070 }
2071 /* We're at the start of a comment. */
95ff8dfc
RS
2072 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2073 &out_charpos, &out_bytepos, &dummy);
2074 from = out_charpos; from_byte = out_bytepos;
2075 if (!found)
04882296 2076 {
95ff8dfc
RS
2077 immediate_quit = 0;
2078 SET_PT_BOTH (from, from_byte);
2079 return Qnil;
b3cfe0c8 2080 }
95ff8dfc
RS
2081 INC_BOTH (from, from_byte);
2082 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 2083 /* We have skipped one comment. */
840f481c 2084 count1--;
b3cfe0c8
RS
2085 }
2086
840f481c 2087 while (count1 < 0)
b3cfe0c8 2088 {
b9145dbb 2089 while (1)
b3cfe0c8 2090 {
c65adb44 2091 int quoted;
f902a008 2092
b9145dbb
RS
2093 if (from <= stop)
2094 {
6a140a74 2095 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
2096 immediate_quit = 0;
2097 return Qnil;
2098 }
b3cfe0c8 2099
6a140a74 2100 DEC_BOTH (from, from_byte);
f902a008 2101 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 2102 quoted = char_quoted (from, from_byte);
6a140a74 2103 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
2104 code = SYNTAX (c);
2105 comstyle = 0;
95ff8dfc 2106 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2107 if (code == Sendcomment)
2108 comstyle = SYNTAX_COMMENT_STYLE (c);
b3cfe0c8 2109 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 2110 && prev_char_comend_first (from, from_byte)
89c6809a 2111 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 2112 {
7fc8191e 2113 /* We must record the comment style encountered so that
b3cfe0c8 2114 later, we can match only the proper comment begin
7fc8191e 2115 sequence of the same style. */
6a140a74 2116 DEC_BOTH (from, from_byte);
f902a008
RS
2117 code = Sendcomment;
2118 /* Calling char_quoted, above, set up global syntax position
2119 at the new value of FROM. */
95ff8dfc
RS
2120 c1 = FETCH_CHAR (from_byte);
2121 comstyle = SYNTAX_COMMENT_STYLE (c1);
2122 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8
RS
2123 }
2124
195d1361
RS
2125 if (code == Scomment_fence)
2126 {
2127 /* Skip until first preceding unquoted comment_fence. */
6a140a74 2128 int found = 0, ini = from, ini_byte = from_byte;
7d0393cf 2129
6a140a74 2130 while (1)
195d1361 2131 {
6a140a74 2132 DEC_BOTH (from, from_byte);
195d1361 2133 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
2134 c = FETCH_CHAR (from_byte);
2135 if (SYNTAX (c) == Scomment_fence
7d0393cf 2136 && !char_quoted (from, from_byte))
195d1361 2137 {
7d0393cf 2138 found = 1;
195d1361
RS
2139 break;
2140 }
fcdd4585
SM
2141 else if (from == stop)
2142 break;
195d1361
RS
2143 }
2144 if (found == 0)
2145 {
2146 from = ini; /* Set point to ini + 1. */
6a140a74 2147 from_byte = ini_byte;
195d1361
RS
2148 goto leave;
2149 }
39c41ad4
SM
2150 else
2151 /* We have skipped one comment. */
2152 break;
195d1361
RS
2153 }
2154 else if (code == Sendcomment)
b3cfe0c8 2155 {
95ff8dfc 2156 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2157 &out_charpos, &out_bytepos);
1aa963c8
SM
2158 if (found == -1)
2159 {
abf8a9ff
SM
2160 if (c == '\n')
2161 /* This end-of-line is not an end-of-comment.
2162 Treat it like a whitespace.
2163 CC-mode (and maybe others) relies on this behavior. */
2164 ;
2165 else
2166 {
2167 /* Failure: we should go back to the end of this
2168 not-quite-endcomment. */
2169 if (SYNTAX(c) != code)
2170 /* It was a two-char Sendcomment. */
2171 INC_BOTH (from, from_byte);
2172 goto leave;
2173 }
1aa963c8 2174 }
2b927d02 2175 else
abf8a9ff
SM
2176 {
2177 /* We have skipped one comment. */
2178 from = out_charpos, from_byte = out_bytepos;
2179 break;
2180 }
b3cfe0c8 2181 }
bb0de084 2182 else if (code != Swhitespace || quoted)
b3cfe0c8 2183 {
195d1361 2184 leave:
b3cfe0c8 2185 immediate_quit = 0;
6a140a74 2186 INC_BOTH (from, from_byte);
ef316cf0 2187 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2188 return Qnil;
2189 }
2190 }
2191
840f481c 2192 count1++;
b3cfe0c8
RS
2193 }
2194
ef316cf0 2195 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
2196 immediate_quit = 0;
2197 return Qt;
2198}
2199\f
bd25db08 2200/* Return syntax code of character C if C is a single byte character
db9f48c3 2201 or `multibyte_symbol_p' is zero. Otherwise, return Ssymbol. */
bd25db08
KH
2202
2203#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
2204 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
2205 ? SYNTAX (c) : Ssymbol)
2206
6a140a74 2207static Lisp_Object
8489eb67 2208scan_lists (from, count, depth, sexpflag)
932e6895
SM
2209 register EMACS_INT from;
2210 EMACS_INT count, depth;
2211 int sexpflag;
8489eb67
RS
2212{
2213 Lisp_Object val;
932e6895 2214 register EMACS_INT stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2215 register int c, c1;
2216 int stringterm;
8489eb67
RS
2217 int quoted;
2218 int mathexit = 0;
93da5fff 2219 register enum syntaxcode code, temp_code;
195d1361 2220 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2221 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2222 int comnested = 0; /* whether the comment is nestable or not */
932e6895
SM
2223 EMACS_INT temp_pos;
2224 EMACS_INT last_good = from;
195d1361 2225 int found;
932e6895
SM
2226 EMACS_INT from_byte;
2227 EMACS_INT out_bytepos, out_charpos;
95ff8dfc 2228 int temp, dummy;
bd25db08 2229 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2230
2231 if (depth > 0) min_depth = 0;
2232
c3907a7e
RS
2233 if (from > ZV) from = ZV;
2234 if (from < BEGV) from = BEGV;
2235
2236 from_byte = CHAR_TO_BYTE (from);
2237
8489eb67
RS
2238 immediate_quit = 1;
2239 QUIT;
2240
195d1361 2241 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2242 while (count > 0)
2243 {
8489eb67
RS
2244 while (from < stop)
2245 {
f902a008 2246 int comstart_first, prefix;
195d1361 2247 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 2248 c = FETCH_CHAR (from_byte);
bd25db08 2249 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2250 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2251 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2252 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2253 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2254 if (depth == min_depth)
2255 last_good = from;
6a140a74 2256 INC_BOTH (from, from_byte);
195d1361 2257 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2258 if (from < stop && comstart_first
ab229fdd 2259 && (c = FETCH_CHAR (from_byte), SYNTAX_COMSTART_SECOND (c))
8489eb67 2260 && parse_sexp_ignore_comments)
e5d4f4dc 2261 {
7d0393cf 2262 /* we have encountered a comment start sequence and we
195d1361 2263 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2264 the comment style this sequence begins so that later,
2265 only a comment end of the same style actually ends
2266 the comment section */
2267 code = Scomment;
95ff8dfc
RS
2268 c1 = FETCH_CHAR (from_byte);
2269 comstyle = SYNTAX_COMMENT_STYLE (c1);
2270 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2271 INC_BOTH (from, from_byte);
f902a008 2272 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc 2273 }
7d0393cf 2274
f902a008 2275 if (prefix)
8489eb67
RS
2276 continue;
2277
0220c518 2278 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2279 {
2280 case Sescape:
2281 case Scharquote:
2282 if (from == stop) goto lose;
6a140a74 2283 INC_BOTH (from, from_byte);
8489eb67
RS
2284 /* treat following character as a word constituent */
2285 case Sword:
2286 case Ssymbol:
2287 if (depth || !sexpflag) break;
195d1361 2288 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2289 while (from < stop)
2290 {
195d1361 2291 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2292
2293 /* Some compilers can't handle this inside the switch. */
bd25db08
KH
2294 c = FETCH_CHAR (from_byte);
2295 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2296 switch (temp)
8489eb67
RS
2297 {
2298 case Scharquote:
2299 case Sescape:
6a140a74 2300 INC_BOTH (from, from_byte);
8489eb67
RS
2301 if (from == stop) goto lose;
2302 break;
2303 case Sword:
2304 case Ssymbol:
2305 case Squote:
2306 break;
2307 default:
2308 goto done;
2309 }
6a140a74 2310 INC_BOTH (from, from_byte);
8489eb67
RS
2311 }
2312 goto done;
2313
195d1361 2314 case Scomment_fence:
95ff8dfc
RS
2315 comstyle = ST_COMMENT_STYLE;
2316 /* FALLTHROUGH */
2317 case Scomment:
8489eb67 2318 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2319 UPDATE_SYNTAX_TABLE_FORWARD (from);
2320 found = forw_comment (from, from_byte, stop,
2321 comnested, comstyle, 0,
2322 &out_charpos, &out_bytepos, &dummy);
2323 from = out_charpos, from_byte = out_bytepos;
2324 if (!found)
8489eb67 2325 {
95ff8dfc
RS
2326 if (depth == 0)
2327 goto done;
2328 goto lose;
8489eb67 2329 }
95ff8dfc
RS
2330 INC_BOTH (from, from_byte);
2331 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2332 break;
2333
2334 case Smath:
2335 if (!sexpflag)
2336 break;
6a140a74
RS
2337 if (from != stop && c == FETCH_CHAR (from_byte))
2338 {
2339 INC_BOTH (from, from_byte);
2340 }
8489eb67
RS
2341 if (mathexit)
2342 {
2343 mathexit = 0;
2344 goto close1;
2345 }
2346 mathexit = 1;
2347
2348 case Sopen:
2349 if (!++depth) goto done;
2350 break;
2351
2352 case Sclose:
2353 close1:
2354 if (!--depth) goto done;
2355 if (depth < min_depth)
9471945b
KS
2356 xsignal3 (Qscan_error,
2357 build_string ("Containing expression ends prematurely"),
2358 make_number (last_good), make_number (from));
8489eb67
RS
2359 break;
2360
2361 case Sstring:
195d1361 2362 case Sstring_fence:
ef316cf0 2363 temp_pos = dec_bytepos (from_byte);
93da5fff 2364 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
2365 while (1)
2366 {
2367 if (from >= stop) goto lose;
195d1361 2368 UPDATE_SYNTAX_TABLE_FORWARD (from);
bd25db08 2369 c = FETCH_CHAR (from_byte);
42ebfa31
SM
2370 if (code == Sstring
2371 ? (c == stringterm
2372 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
bd25db08 2373 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2374 break;
7c7ca3d2
RS
2375
2376 /* Some compilers can't handle this inside the switch. */
bd25db08 2377 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2378 switch (temp)
8489eb67
RS
2379 {
2380 case Scharquote:
2381 case Sescape:
6a140a74 2382 INC_BOTH (from, from_byte);
8489eb67 2383 }
6a140a74 2384 INC_BOTH (from, from_byte);
8489eb67 2385 }
6a140a74 2386 INC_BOTH (from, from_byte);
8489eb67
RS
2387 if (!depth && sexpflag) goto done;
2388 break;
240c43e8
SM
2389 default:
2390 /* Ignore whitespace, punctuation, quote, endcomment. */
2391 break;
8489eb67
RS
2392 }
2393 }
2394
2395 /* Reached end of buffer. Error if within object, return nil if between */
2396 if (depth) goto lose;
2397
2398 immediate_quit = 0;
2399 return Qnil;
2400
2401 /* End of object reached */
2402 done:
2403 count--;
2404 }
2405
2406 while (count < 0)
2407 {
8489eb67
RS
2408 while (from > stop)
2409 {
6a140a74 2410 DEC_BOTH (from, from_byte);
195d1361 2411 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2412 c = FETCH_CHAR (from_byte);
bd25db08 2413 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2414 if (depth == min_depth)
2415 last_good = from;
7fc8191e 2416 comstyle = 0;
95ff8dfc 2417 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2418 if (code == Sendcomment)
2419 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2420 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2421 && prev_char_comend_first (from, from_byte)
8489eb67 2422 && parse_sexp_ignore_comments)
e5d4f4dc 2423 {
f902a008 2424 /* We must record the comment style encountered so that
e5d4f4dc 2425 later, we can match only the proper comment begin
f902a008 2426 sequence of the same style. */
6a140a74 2427 DEC_BOTH (from, from_byte);
f902a008
RS
2428 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2429 code = Sendcomment;
95ff8dfc
RS
2430 c1 = FETCH_CHAR (from_byte);
2431 comstyle = SYNTAX_COMMENT_STYLE (c1);
2432 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc 2433 }
7d0393cf 2434
9828a477 2435 /* Quoting turns anything except a comment-ender
e6365855 2436 into a word character. Note that this cannot be true
f902a008 2437 if we decremented FROM in the if-statement above. */
9828a477 2438 if (code != Sendcomment && char_quoted (from, from_byte))
240c43e8
SM
2439 {
2440 DEC_BOTH (from, from_byte);
2441 code = Sword;
2442 }
9828a477 2443 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2444 continue;
2445
9828a477 2446 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2447 {
2448 case Sword:
2449 case Ssymbol:
9828a477
KH
2450 case Sescape:
2451 case Scharquote:
8489eb67 2452 if (depth || !sexpflag) break;
195d1361
RS
2453 /* This word counts as a sexp; count object finished
2454 after passing it. */
8489eb67
RS
2455 while (from > stop)
2456 {
6a140a74 2457 temp_pos = from_byte;
ef316cf0
RS
2458 if (! NILP (current_buffer->enable_multibyte_characters))
2459 DEC_POS (temp_pos);
2460 else
2461 temp_pos--;
6a140a74 2462 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477 2463 c1 = FETCH_CHAR (temp_pos);
bd25db08 2464 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2465 /* Don't allow comment-end to be quoted. */
2466 if (temp_code == Sendcomment)
2467 goto done2;
6a140a74 2468 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2469 if (quoted)
93da5fff 2470 {
6a140a74 2471 DEC_BOTH (from, from_byte);
ef316cf0 2472 temp_pos = dec_bytepos (temp_pos);
6a140a74 2473 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2474 }
2475 c1 = FETCH_CHAR (temp_pos);
bd25db08 2476 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2477 if (! (quoted || temp_code == Sword
2478 || temp_code == Ssymbol
2479 || temp_code == Squote))
8489eb67 2480 goto done2;
6a140a74 2481 DEC_BOTH (from, from_byte);
8489eb67
RS
2482 }
2483 goto done2;
2484
2485 case Smath:
2486 if (!sexpflag)
2487 break;
ef316cf0 2488 temp_pos = dec_bytepos (from_byte);
6a140a74 2489 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2490 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2491 DEC_BOTH (from, from_byte);
8489eb67
RS
2492 if (mathexit)
2493 {
2494 mathexit = 0;
2495 goto open2;
2496 }
2497 mathexit = 1;
2498
2499 case Sclose:
2500 if (!++depth) goto done2;
2501 break;
2502
2503 case Sopen:
2504 open2:
2505 if (!--depth) goto done2;
2506 if (depth < min_depth)
9471945b
KS
2507 xsignal3 (Qscan_error,
2508 build_string ("Containing expression ends prematurely"),
2509 make_number (last_good), make_number (from));
8489eb67
RS
2510 break;
2511
2512 case Sendcomment:
2513 if (!parse_sexp_ignore_comments)
2514 break;
95ff8dfc 2515 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2516 &out_charpos, &out_bytepos);
1aa963c8
SM
2517 /* FIXME: if found == -1, then it really wasn't a comment-end.
2518 For single-char Sendcomment, we can't do much about it apart
2519 from skipping the char.
2520 For 2-char endcomments, we could try again, taking both
2521 chars as separate entities, but it's a lot of trouble
2522 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2523 if (found != -1)
2524 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2525 break;
2526
195d1361
RS
2527 case Scomment_fence:
2528 case Sstring_fence:
2529 while (1)
2530 {
195d1361 2531 if (from == stop) goto lose;
d0abdf7e 2532 DEC_BOTH (from, from_byte);
195d1361 2533 UPDATE_SYNTAX_TABLE_BACKWARD (from);
7d0393cf 2534 if (!char_quoted (from, from_byte)
bd25db08
KH
2535 && (c = FETCH_CHAR (from_byte),
2536 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2537 break;
2538 }
2539 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2540 break;
7d0393cf 2541
8489eb67 2542 case Sstring:
6a140a74 2543 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2544 while (1)
2545 {
2546 if (from == stop) goto lose;
d0abdf7e
SM
2547 DEC_BOTH (from, from_byte);
2548 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2549 if (!char_quoted (from, from_byte)
2550 && stringterm == (c = FETCH_CHAR (from_byte))
42ebfa31 2551 && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
8489eb67 2552 break;
8489eb67 2553 }
8489eb67
RS
2554 if (!depth && sexpflag) goto done2;
2555 break;
240c43e8
SM
2556 default:
2557 /* Ignore whitespace, punctuation, quote, endcomment. */
2558 break;
8489eb67
RS
2559 }
2560 }
2561
2562 /* Reached start of buffer. Error if within object, return nil if between */
2563 if (depth) goto lose;
2564
2565 immediate_quit = 0;
2566 return Qnil;
2567
2568 done2:
2569 count++;
2570 }
2571
2572
2573 immediate_quit = 0;
1e142fb7 2574 XSETFASTINT (val, from);
8489eb67
RS
2575 return val;
2576
2577 lose:
9471945b
KS
2578 xsignal3 (Qscan_error,
2579 build_string ("Unbalanced parentheses"),
2580 make_number (last_good), make_number (from));
8489eb67
RS
2581}
2582
8489eb67 2583DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
fdb82f93
PJ
2584 doc: /* Scan from character number FROM by COUNT lists.
2585Returns the character number of the position thus found.
2586
2587If DEPTH is nonzero, paren depth begins counting from that value,
2588only places where the depth in parentheses becomes zero
2589are candidates for stopping; COUNT such places are counted.
2590Thus, a positive value for DEPTH means go out levels.
2591
2592Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2593
2594If the beginning or end of (the accessible part of) the buffer is reached
2595and the depth is wrong, an error is signaled.
2596If the depth is right but the count is not used up, nil is returned. */)
2597 (from, count, depth)
8489eb67
RS
2598 Lisp_Object from, count, depth;
2599{
b7826503
PJ
2600 CHECK_NUMBER (from);
2601 CHECK_NUMBER (count);
2602 CHECK_NUMBER (depth);
8489eb67
RS
2603
2604 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2605}
2606
2607DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
fdb82f93
PJ
2608 doc: /* Scan from character number FROM by COUNT balanced expressions.
2609If COUNT is negative, scan backwards.
2610Returns the character number of the position thus found.
2611
2612Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2613
2614If the beginning or end of (the accessible part of) the buffer is reached
2615in the middle of a parenthetical grouping, an error is signaled.
2616If the beginning or end is reached between groupings
2617but before count is used up, nil is returned. */)
2618 (from, count)
8489eb67
RS
2619 Lisp_Object from, count;
2620{
b7826503
PJ
2621 CHECK_NUMBER (from);
2622 CHECK_NUMBER (count);
8489eb67
RS
2623
2624 return scan_lists (XINT (from), XINT (count), 0, 1);
2625}
2626
2627DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
fdb82f93
PJ
2628 0, 0, 0,
2629 doc: /* Move point backward over any number of chars with prefix syntax.
2630This includes chars with "quote" or "prefix" syntax (' or p). */)
2631 ()
8489eb67
RS
2632{
2633 int beg = BEGV;
6a140a74
RS
2634 int opoint = PT;
2635 int opoint_byte = PT_BYTE;
6ec8bbd2 2636 int pos = PT;
6a140a74 2637 int pos_byte = PT_BYTE;
93da5fff 2638 int c;
93da5fff 2639
7d0393cf 2640 if (pos <= beg)
195d1361 2641 {
f902a008
RS
2642 SET_PT_BOTH (opoint, opoint_byte);
2643
2644 return Qnil;
195d1361 2645 }
8489eb67 2646
f902a008
RS
2647 SETUP_SYNTAX_TABLE (pos, -1);
2648
6a140a74
RS
2649 DEC_BOTH (pos, pos_byte);
2650
1fd3172d 2651 while (!char_quoted (pos, pos_byte)
195d1361 2652 /* Previous statement updates syntax table. */
6a140a74 2653 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2654 || SYNTAX_PREFIX (c)))
2655 {
1fd3172d
RS
2656 opoint = pos;
2657 opoint_byte = pos_byte;
2658
2659 if (pos + 1 > beg)
2660 DEC_BOTH (pos, pos_byte);
93da5fff 2661 }
8489eb67 2662
6a140a74 2663 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2664
2665 return Qnil;
2666}
2667\f
6a140a74 2668/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2669 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2670 and return a description of the state of the parse at END.
c81a3712 2671 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2672 If COMMENTSTOP is 1, stop at the start of a comment.
2673 If COMMENTSTOP is -1, stop at the start or end of a comment,
2674 after the beginning of a string, or after the end of a string. */
8489eb67 2675
340f92b5 2676static void
6a140a74 2677scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2678 stopbefore, oldstate, commentstop)
e5d4f4dc 2679 struct lisp_parse_state *stateptr;
8489eb67 2680 register int from;
ddc447fc 2681 int end, targetdepth, stopbefore, from_byte;
8489eb67 2682 Lisp_Object oldstate;
c81a3712 2683 int commentstop;
8489eb67
RS
2684{
2685 struct lisp_parse_state state;
2686
2687 register enum syntaxcode code;
95ff8dfc
RS
2688 int c1;
2689 int comnested;
8489eb67
RS
2690 struct level { int last, prev; };
2691 struct level levelstart[100];
2692 register struct level *curlevel = levelstart;
2693 struct level *endlevel = levelstart + 100;
8489eb67
RS
2694 register int depth; /* Paren depth of current scanning location.
2695 level - levelstart equals this except
2696 when the depth becomes negative. */
2697 int mindepth; /* Lowest DEPTH value seen. */
2698 int start_quoted = 0; /* Nonzero means starting after a char quote */
2699 Lisp_Object tem;
93da5fff 2700 int prev_from; /* Keep one character before FROM. */
6a140a74 2701 int prev_from_byte;
1fd3172d 2702 int prev_from_syntax;
195d1361
RS
2703 int boundary_stop = commentstop == -1;
2704 int nofence;
95ff8dfc 2705 int found;
60c86a83 2706 EMACS_INT out_bytepos, out_charpos;
7c7ca3d2 2707 int temp;
93da5fff
KH
2708
2709 prev_from = from;
6a140a74
RS
2710 prev_from_byte = from_byte;
2711 if (from != BEGV)
2712 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2713
2714 /* Use this macro instead of `from++'. */
6a140a74
RS
2715#define INC_FROM \
2716do { prev_from = from; \
2717 prev_from_byte = from_byte; \
ab229fdd
AS
2718 temp = FETCH_CHAR (prev_from_byte); \
2719 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
ef316cf0 2720 INC_BOTH (from, from_byte); \
3ceb4629
SM
2721 if (from < end) \
2722 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2723 } while (0)
8489eb67
RS
2724
2725 immediate_quit = 1;
2726 QUIT;
2727
265a9e55 2728 if (NILP (oldstate))
8489eb67
RS
2729 {
2730 depth = 0;
2731 state.instring = -1;
2732 state.incomment = 0;
195d1361
RS
2733 state.comstyle = 0; /* comment style a by default. */
2734 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2735 }
2736 else
2737 {
2738 tem = Fcar (oldstate);
265a9e55 2739 if (!NILP (tem))
8489eb67
RS
2740 depth = XINT (tem);
2741 else
2742 depth = 0;
2743
2744 oldstate = Fcdr (oldstate);
2745 oldstate = Fcdr (oldstate);
2746 oldstate = Fcdr (oldstate);
2747 tem = Fcar (oldstate);
195d1361 2748 /* Check whether we are inside string_fence-style string: */
7d0393cf
JB
2749 state.instring = (!NILP (tem)
2750 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
e76f1c44 2751 : -1);
8489eb67
RS
2752
2753 oldstate = Fcdr (oldstate);
2754 tem = Fcar (oldstate);
e76f1c44
GM
2755 state.incomment = (!NILP (tem)
2756 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2757 : 0);
8489eb67
RS
2758
2759 oldstate = Fcdr (oldstate);
2760 tem = Fcar (oldstate);
265a9e55 2761 start_quoted = !NILP (tem);
e5d4f4dc 2762
95ff8dfc 2763 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2764 style a. If it is non-nil, we are in comment style b */
2765 oldstate = Fcdr (oldstate);
e5d4f4dc 2766 oldstate = Fcdr (oldstate);
195d1361 2767 tem = Fcar (oldstate);
7d0393cf 2768 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
e76f1c44 2769 ? ST_COMMENT_STYLE : 1);
195d1361 2770
e5d4f4dc 2771 oldstate = Fcdr (oldstate);
e5d4f4dc 2772 tem = Fcar (oldstate);
195d1361 2773 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2774 oldstate = Fcdr (oldstate);
2775 tem = Fcar (oldstate);
2776 while (!NILP (tem)) /* >= second enclosing sexps. */
2777 {
2778 /* curlevel++->last ran into compiler bug on Apollo */
2779 curlevel->last = XINT (Fcar (tem));
2780 if (++curlevel == endlevel)
02010917 2781 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2782 curlevel->prev = -1;
2783 curlevel->last = -1;
2784 tem = Fcdr (tem);
2785 }
8489eb67
RS
2786 }
2787 state.quoted = 0;
2788 mindepth = depth;
2789
2790 curlevel->prev = -1;
2791 curlevel->last = -1;
2792
326b283b 2793 SETUP_SYNTAX_TABLE (prev_from, 1);
ab229fdd
AS
2794 temp = FETCH_CHAR (prev_from_byte);
2795 prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3ceb4629 2796 UPDATE_SYNTAX_TABLE_FORWARD (from);
326b283b 2797
195d1361 2798 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2799
326b283b
RS
2800 if (state.incomment)
2801 goto startincomment;
8489eb67
RS
2802 if (state.instring >= 0)
2803 {
195d1361 2804 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2805 if (start_quoted)
2806 goto startquotedinstring;
8489eb67
RS
2807 goto startinstring;
2808 }
326b283b
RS
2809 else if (start_quoted)
2810 goto startquoted;
1fd3172d 2811
8489eb67
RS
2812 while (from < end)
2813 {
93da5fff 2814 INC_FROM;
1fd3172d 2815 code = prev_from_syntax & 0xff;
4c920633 2816
c5d4b96f
SM
2817 if (from < end
2818 && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
2819 && (c1 = FETCH_CHAR (from_byte),
2820 SYNTAX_COMSTART_SECOND (c1)))
2821 /* Duplicate code to avoid a complex if-expression
2822 which causes trouble for the SGI compiler. */
95ff8dfc 2823 {
c5d4b96f
SM
2824 /* Record the comment style we have entered so that only
2825 the comment-end sequence of the same style actually
2826 terminates the comment section. */
2827 state.comstyle = SYNTAX_COMMENT_STYLE (c1);
2828 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2829 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2830 state.incomment = comnested ? 1 : -1;
95ff8dfc 2831 state.comstr_start = prev_from;
c5d4b96f
SM
2832 INC_FROM;
2833 code = Scomment;
95ff8dfc 2834 }
4c920633 2835 else if (code == Scomment_fence)
e5d4f4dc
RS
2836 {
2837 /* Record the comment style we have entered so that only
2838 the comment-end sequence of the same style actually
2839 terminates the comment section. */
95ff8dfc
RS
2840 state.comstyle = ST_COMMENT_STYLE;
2841 state.incomment = -1;
195d1361 2842 state.comstr_start = prev_from;
e5d4f4dc 2843 code = Scomment;
e5d4f4dc 2844 }
c5d4b96f
SM
2845 else if (code == Scomment)
2846 {
2847 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
2848 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2849 1 : -1);
2850 state.comstr_start = prev_from;
2851 }
e5d4f4dc 2852
1fd3172d 2853 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2854 continue;
0220c518 2855 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2856 {
2857 case Sescape:
2858 case Scharquote:
2859 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2860 curlevel->last = prev_from;
8489eb67
RS
2861 startquoted:
2862 if (from == end) goto endquoted;
93da5fff 2863 INC_FROM;
8489eb67
RS
2864 goto symstarted;
2865 /* treat following character as a word constituent */
2866 case Sword:
2867 case Ssymbol:
2868 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2869 curlevel->last = prev_from;
8489eb67
RS
2870 symstarted:
2871 while (from < end)
2872 {
7c7ca3d2 2873 /* Some compilers can't handle this inside the switch. */
ab229fdd
AS
2874 temp = FETCH_CHAR (from_byte);
2875 temp = SYNTAX (temp);
7c7ca3d2 2876 switch (temp)
8489eb67
RS
2877 {
2878 case Scharquote:
2879 case Sescape:
93da5fff 2880 INC_FROM;
8489eb67
RS
2881 if (from == end) goto endquoted;
2882 break;
2883 case Sword:
2884 case Ssymbol:
2885 case Squote:
2886 break;
2887 default:
2888 goto symdone;
2889 }
93da5fff 2890 INC_FROM;
8489eb67
RS
2891 }
2892 symdone:
2893 curlevel->prev = curlevel->last;
2894 break;
2895
240c43e8 2896 case Scomment_fence: /* Can't happen because it's handled above. */
8489eb67 2897 case Scomment:
195d1361 2898 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
2899 startincomment:
2900 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 2901 that we find a 2-char comment ender even if we start in the
e6365855 2902 middle of it. We don't want to do that if we're just at the
d355bd8a 2903 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
2904 found = forw_comment (from, from_byte, end,
2905 state.incomment, state.comstyle,
e6365855
SM
2906 (from == BEGV || from < state.comstr_start + 3)
2907 ? 0 : prev_from_syntax,
95ff8dfc
RS
2908 &out_charpos, &out_bytepos, &state.incomment);
2909 from = out_charpos; from_byte = out_bytepos;
2910 /* Beware! prev_from and friends are invalid now.
2911 Luckily, the `done' doesn't use them and the INC_FROM
2912 sets them to a sane value without looking at them. */
2913 if (!found) goto done;
d355bd8a 2914 INC_FROM;
8489eb67 2915 state.incomment = 0;
e5d4f4dc 2916 state.comstyle = 0; /* reset the comment style */
195d1361 2917 if (boundary_stop) goto done;
8489eb67
RS
2918 break;
2919
2920 case Sopen:
2921 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2922 depth++;
2923 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2924 curlevel->last = prev_from;
8489eb67 2925 if (++curlevel == endlevel)
02010917 2926 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
2927 curlevel->prev = -1;
2928 curlevel->last = -1;
30844415 2929 if (targetdepth == depth) goto done;
8489eb67
RS
2930 break;
2931
2932 case Sclose:
2933 depth--;
2934 if (depth < mindepth)
2935 mindepth = depth;
2936 if (curlevel != levelstart)
2937 curlevel--;
2938 curlevel->prev = curlevel->last;
30844415 2939 if (targetdepth == depth) goto done;
8489eb67
RS
2940 break;
2941
2942 case Sstring:
195d1361
RS
2943 case Sstring_fence:
2944 state.comstr_start = from - 1;
8489eb67 2945 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2946 curlevel->last = prev_from;
7d0393cf 2947 state.instring = (code == Sstring
6a140a74 2948 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2949 : ST_STRING_STYLE);
2950 if (boundary_stop) goto done;
8489eb67 2951 startinstring:
195d1361 2952 {
644ea4df 2953 nofence = state.instring != ST_STRING_STYLE;
7d0393cf 2954
644ea4df
RS
2955 while (1)
2956 {
2957 int c;
195d1361 2958
644ea4df 2959 if (from >= end) goto done;
6a140a74 2960 c = FETCH_CHAR (from_byte);
7c7ca3d2
RS
2961 /* Some compilers can't handle this inside the switch. */
2962 temp = SYNTAX (c);
ccf89641
KH
2963
2964 /* Check TEMP here so that if the char has
2965 a syntax-table property which says it is NOT
2966 a string character, it does not end the string. */
2967 if (nofence && c == state.instring && temp == Sstring)
2968 break;
2969
7c7ca3d2 2970 switch (temp)
644ea4df
RS
2971 {
2972 case Sstring_fence:
2973 if (!nofence) goto string_end;
2974 break;
2975 case Scharquote:
2976 case Sescape:
2977 INC_FROM;
2978 startquotedinstring:
2979 if (from >= end) goto endquoted;
195d1361 2980 }
644ea4df
RS
2981 INC_FROM;
2982 }
195d1361
RS
2983 }
2984 string_end:
8489eb67
RS
2985 state.instring = -1;
2986 curlevel->prev = curlevel->last;
93da5fff 2987 INC_FROM;
195d1361 2988 if (boundary_stop) goto done;
8489eb67
RS
2989 break;
2990
2991 case Smath:
240c43e8
SM
2992 /* FIXME: We should do something with it. */
2993 break;
2994 default:
2995 /* Ignore whitespace, punctuation, quote, endcomment. */
8489eb67
RS
2996 break;
2997 }
2998 }
2999 goto done;
3000
3001 stop: /* Here if stopping before start of sexp. */
93da5fff 3002 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
3003 goto done; /* but return the position before it. */
3004
3005 endquoted:
3006 state.quoted = 1;
3007 done:
3008 state.depth = depth;
3009 state.mindepth = mindepth;
3010 state.thislevelstart = curlevel->prev;
3011 state.prevlevelstart
3012 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3013 state.location = from;
1a102a58
RS
3014 state.levelstarts = Qnil;
3015 while (--curlevel >= levelstart)
3016 state.levelstarts = Fcons (make_number (curlevel->last),
3017 state.levelstarts);
8489eb67
RS
3018 immediate_quit = 0;
3019
e5d4f4dc 3020 *stateptr = state;
8489eb67
RS
3021}
3022
c81a3712 3023DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
fdb82f93
PJ
3024 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3025Parsing stops at TO or when certain criteria are met;
3026 point is set to where parsing stops.
3027If fifth arg OLDSTATE is omitted or nil,
3028 parsing assumes that FROM is the beginning of a function.
0e6228bc 3029Value is a list of elements describing final state of parsing:
fdb82f93
PJ
3030 0. depth in parens.
3031 1. character address of start of innermost containing list; nil if none.
3032 2. character address of start of last complete sexp terminated.
3033 3. non-nil if inside a string.
3034 (it is the character that will terminate the string,
3035 or t if the string should be terminated by a generic string delimiter.)
7d0393cf 3036 4. nil if outside a comment, t if inside a non-nestable comment,
fdb82f93
PJ
3037 else an integer (the current comment nesting).
3038 5. t if following a quote character.
3039 6. the minimum paren-depth encountered during this scan.
3040 7. t if in a comment of style b; symbol `syntax-table' if the comment
3041 should be terminated by a generic comment delimiter.
3042 8. character address of start of comment or string; nil if not in one.
3043 9. Intermediate data for continuation of parsing (subject to change).
3044If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3045in parentheses becomes equal to TARGETDEPTH.
3046Fourth arg STOPBEFORE non-nil means stop when come to
3047 any character that starts a sexp.
0e6228bc 3048Fifth arg OLDSTATE is a list like what this function returns.
fdb82f93 3049 It is used to initialize the state of the parse. Elements number 1, 2, 6
43726c05 3050 and 8 are ignored.
fdb82f93
PJ
3051Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3052 If it is symbol `syntax-table', stop after the start of a comment or a
3053 string, or after end of a comment or a string. */)
3054 (from, to, targetdepth, stopbefore, oldstate, commentstop)
c81a3712 3055 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
3056{
3057 struct lisp_parse_state state;
3058 int target;
3059
265a9e55 3060 if (!NILP (targetdepth))
8489eb67 3061 {
b7826503 3062 CHECK_NUMBER (targetdepth);
8489eb67
RS
3063 target = XINT (targetdepth);
3064 }
3065 else
3066 target = -100000; /* We won't reach this depth */
3067
3068 validate_region (&from, &to);
6a140a74
RS
3069 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3070 XINT (to),
c81a3712 3071 target, !NILP (stopbefore), oldstate,
7d0393cf 3072 (NILP (commentstop)
195d1361 3073 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
3074
3075 SET_PT (state.location);
7d0393cf 3076
8489eb67
RS
3077 return Fcons (make_number (state.depth),
3078 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3079 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
7d0393cf
JB
3080 Fcons (state.instring >= 0
3081 ? (state.instring == ST_STRING_STYLE
195d1361 3082 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
3083 Fcons (state.incomment < 0 ? Qt :
3084 (state.incomment == 0 ? Qnil :
3085 make_number (state.incomment)),
8489eb67 3086 Fcons (state.quoted ? Qt : Qnil,
bff37d2a 3087 Fcons (make_number (state.mindepth),
7d0393cf 3088 Fcons ((state.comstyle
bff37d2a
RS
3089 ? (state.comstyle == ST_COMMENT_STYLE
3090 ? Qsyntax_table : Qt) :
3091 Qnil),
0beaf54f
DL
3092 Fcons (((state.incomment
3093 || (state.instring >= 0))
bff37d2a
RS
3094 ? make_number (state.comstr_start)
3095 : Qnil),
1a102a58 3096 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
3097}
3098\f
dfcf069d 3099void
8489eb67
RS
3100init_syntax_once ()
3101{
78f9a1f7 3102 register int i, c;
8ea151b2 3103 Lisp_Object temp;
8489eb67 3104
5ebaddf5
RS
3105 /* This has to be done here, before we call Fmake_char_table. */
3106 Qsyntax_table = intern ("syntax-table");
3107 staticpro (&Qsyntax_table);
3108
3109 /* Intern this now in case it isn't already done.
3110 Setting this variable twice is harmless.
3111 But don't staticpro it here--that is done in alloc.c. */
3112 Qchar_table_extra_slots = intern ("char-table-extra-slots");
3113
93da5fff 3114 /* Create objects which can be shared among syntax tables. */
42ebfa31 3115 Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
93da5fff
KH
3116 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3117 XVECTOR (Vsyntax_code_object)->contents[i]
3118 = Fcons (make_number (i), Qnil);
3119
5ebaddf5
RS
3120 /* Now we are ready to set up this property, so we can
3121 create syntax tables. */
3122 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3123
93da5fff 3124 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 3125
5ebaddf5 3126 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 3127
aa7b08b4
RS
3128 /* Control characters should not be whitespace. */
3129 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3130 for (i = 0; i <= ' ' - 1; i++)
3131 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3132 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3133
3134 /* Except that a few really are whitespace. */
3135 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3136 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3137 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3138 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3139 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3140 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3141
93da5fff 3142 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 3143 for (i = 'a'; i <= 'z'; i++)
8ea151b2 3144 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3145 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 3146 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 3147 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
3148 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3149
3150 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3151 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3152
3153 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3154 Fcons (make_number (Sopen), make_number (')')));
3155 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3156 Fcons (make_number (Sclose), make_number ('(')));
3157 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3158 Fcons (make_number (Sopen), make_number (']')));
3159 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3160 Fcons (make_number (Sclose), make_number ('[')));
3161 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3162 Fcons (make_number (Sopen), make_number ('}')));
3163 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3164 Fcons (make_number (Sclose), make_number ('{')));
3165 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3166 Fcons (make_number ((int) Sstring), Qnil));
3167 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3168 Fcons (make_number ((int) Sescape), Qnil));
3169
93da5fff 3170 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 3171 for (i = 0; i < 10; i++)
78f9a1f7
KH
3172 {
3173 c = "_-+*/&|<>="[i];
3174 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3175 }
8489eb67 3176
93da5fff 3177 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 3178 for (i = 0; i < 12; i++)
78f9a1f7
KH
3179 {
3180 c = ".,;:?!#@~^'`"[i];
3181 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3182 }
bd25db08
KH
3183
3184 /* All multibyte characters have syntax `word' by default. */
3185 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3186 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
3187 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
8489eb67
RS
3188}
3189
dfcf069d 3190void
8489eb67
RS
3191syms_of_syntax ()
3192{
3193 Qsyntax_table_p = intern ("syntax-table-p");
3194 staticpro (&Qsyntax_table_p);
3195
93da5fff
KH
3196 staticpro (&Vsyntax_code_object);
3197
a2994c46
KS
3198 staticpro (&gl_state.object);
3199 staticpro (&gl_state.global_code);
3200 staticpro (&gl_state.current_syntax_table);
3201 staticpro (&gl_state.old_prop);
3202
3203 /* Defined in regex.c */
3204 staticpro (&re_match_object);
3205
7bf5e9e4
RS
3206 Qscan_error = intern ("scan-error");
3207 staticpro (&Qscan_error);
3208 Fput (Qscan_error, Qerror_conditions,
f3be100f 3209 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
3210 Fput (Qscan_error, Qerror_message,
3211 build_string ("Scan error"));
3212
8489eb67 3213 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
fdb82f93 3214 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
8489eb67 3215
195d1361 3216 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
fdb82f93
PJ
3217 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3218Otherwise, that text property is simply ignored.
3219See the info node `(elisp)Syntax Properties' for a description of the
3220`syntax-table' property. */);
195d1361 3221
8489eb67
RS
3222 words_include_escapes = 0;
3223 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
fdb82f93 3224 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
8489eb67 3225
bd25db08 3226 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
fdb82f93 3227 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
bd25db08
KH
3228 multibyte_syntax_as_symbol = 0;
3229
f4ed767f
GM
3230 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3231 &open_paren_in_column_0_is_defun_start,
b222e415 3232 doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun. */);
f4ed767f
GM
3233 open_paren_in_column_0_is_defun_start = 1;
3234
8489eb67
RS
3235 defsubr (&Ssyntax_table_p);
3236 defsubr (&Ssyntax_table);
3237 defsubr (&Sstandard_syntax_table);
3238 defsubr (&Scopy_syntax_table);
3239 defsubr (&Sset_syntax_table);
3240 defsubr (&Schar_syntax);
beefa22e 3241 defsubr (&Smatching_paren);
c65adb44 3242 defsubr (&Sstring_to_syntax);
8489eb67 3243 defsubr (&Smodify_syntax_entry);
62abe9cb 3244 defsubr (&Sinternal_describe_syntax_value);
8489eb67
RS
3245
3246 defsubr (&Sforward_word);
3247
195d1361
RS
3248 defsubr (&Sskip_chars_forward);
3249 defsubr (&Sskip_chars_backward);
3250 defsubr (&Sskip_syntax_forward);
3251 defsubr (&Sskip_syntax_backward);
3252
b3cfe0c8 3253 defsubr (&Sforward_comment);
8489eb67
RS
3254 defsubr (&Sscan_lists);
3255 defsubr (&Sscan_sexps);
3256 defsubr (&Sbackward_prefix_chars);
3257 defsubr (&Sparse_partial_sexp);
3258}
ab5796a9
MB
3259
3260/* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3261 (do not change this comment) */