*** empty log message ***
[bpt/emacs.git] / src / syntax.c
CommitLineData
8489eb67 1/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
b8855607 2 Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
8489eb67
RS
3
4This file is part of GNU Emacs.
5
6GNU Emacs is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
a4275ad1 8the Free Software Foundation; either version 2, or (at your option)
8489eb67
RS
9any later version.
10
11GNU Emacs is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
8489eb67
RS
20
21
18160b98 22#include <config.h>
8489eb67
RS
23#include <ctype.h>
24#include "lisp.h"
25#include "commands.h"
26#include "buffer.h"
93da5fff 27#include "charset.h"
195d1361
RS
28
29/* Make syntax table lookup grant data in gl_state. */
30#define SYNTAX_ENTRY_VIA_PROPERTY
31
8489eb67 32#include "syntax.h"
195d1361
RS
33#include "intervals.h"
34
35/* We use these constants in place for comment-style and
36 string-ender-char to distinguish comments/strings started by
37 comment_fence and string_fence codes. */
38
39#define ST_COMMENT_STYLE (256 + 1)
40#define ST_STRING_STYLE (256 + 2)
93da5fff 41#include "category.h"
8489eb67 42
7bf5e9e4 43Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
8489eb67
RS
44
45int words_include_escapes;
195d1361 46int parse_sexp_lookup_properties;
8489eb67 47
bd25db08
KH
48/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
49int multibyte_syntax_as_symbol;
50
8ea151b2
RS
51/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
52 if not compiled with GCC. No need to mark it, since it is used
53 only very temporarily. */
54Lisp_Object syntax_temp;
55
f4ed767f
GM
56/* Non-zero means an open parenthesis in column 0 is always considered
57 to be the start of a defun. Zero means an open parenthesis in
58 column 0 has no special meaning. */
59
60int open_paren_in_column_0_is_defun_start;
61
e5d4f4dc
RS
62/* This is the internal form of the parse state used in parse-partial-sexp. */
63
64struct lisp_parse_state
65 {
195d1361
RS
66 int depth; /* Depth at end of parsing. */
67 int instring; /* -1 if not within string, else desired terminator. */
95ff8dfc 68 int incomment; /* -1 if in unnestable comment else comment nesting */
195d1361 69 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
e5d4f4dc
RS
70 int quoted; /* Nonzero if just after an escape char at end of parsing */
71 int thislevelstart; /* Char number of most recent start-of-expression at current level */
72 int prevlevelstart; /* Char number of start of containing expression */
195d1361 73 int location; /* Char number at which parsing stopped. */
e5d4f4dc 74 int mindepth; /* Minimum depth seen while scanning. */
195d1361 75 int comstr_start; /* Position just after last comment/string starter. */
1a102a58
RS
76 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
77 of levels (starting from outermost). */
e5d4f4dc
RS
78 };
79\f
37bef230
RS
80/* These variables are a cache for finding the start of a defun.
81 find_start_pos is the place for which the defun start was found.
82 find_start_value is the defun start position found for it.
6a140a74 83 find_start_value_byte is the corresponding byte position.
37bef230
RS
84 find_start_buffer is the buffer it was found in.
85 find_start_begv is the BEGV value when it was found.
86 find_start_modiff is the value of MODIFF when it was found. */
87
88static int find_start_pos;
89static int find_start_value;
6a140a74 90static int find_start_value_byte;
37bef230
RS
91static struct buffer *find_start_buffer;
92static int find_start_begv;
93static int find_start_modiff;
6a140a74
RS
94
95
96static int find_defun_start P_ ((int, int));
95ff8dfc 97static int back_comment P_ ((int, int, int, int, int, int *, int *));
6a140a74
RS
98static int char_quoted P_ ((int, int));
99static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
100static Lisp_Object scan_lists P_ ((int, int, int, int));
101static void scan_sexps_forward P_ ((struct lisp_parse_state *,
102 int, int, int, int,
103 int, Lisp_Object, int));
195d1361
RS
104\f
105
106struct gl_state_s gl_state; /* Global state of syntax parser. */
107
108INTERVAL interval_of ();
109#define INTERVALS_AT_ONCE 10 /* 1 + max-number of intervals
110 to scan to property-change. */
111
6a140a74
RS
112/* Update gl_state to an appropriate interval which contains CHARPOS. The
113 sign of COUNT give the relative position of CHARPOS wrt the previously
195d1361 114 valid interval. If INIT, only [be]_property fields of gl_state are
6a140a74 115 valid at start, the rest is filled basing on OBJECT.
195d1361 116
6a140a74 117 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
195d1361
RS
118 direction than the intervals - or in an interval. We update the
119 current syntax-table basing on the property of this interval, and
6a140a74 120 update the interval to start further than CHARPOS - or be
195d1361 121 NULL_INTERVAL. We also update lim_property to be the next value of
6a140a74 122 charpos to call this subroutine again - or be before/after the
195d1361
RS
123 start/end of OBJECT. */
124
125void
6a140a74
RS
126update_syntax_table (charpos, count, init, object)
127 int charpos, count, init;
195d1361
RS
128 Lisp_Object object;
129{
130 Lisp_Object tmp_table;
4ffe723b 131 int cnt = 0, invalidate = 1;
195d1361
RS
132 INTERVAL i, oldi;
133
134 if (init)
135 {
136 gl_state.start = gl_state.b_property;
137 gl_state.stop = gl_state.e_property;
6a140a74 138 gl_state.forward_i = interval_of (charpos, object);
195d1361
RS
139 i = gl_state.backward_i = gl_state.forward_i;
140 gl_state.left_ok = gl_state.right_ok = 1;
141 invalidate = 0;
142 if (NULL_INTERVAL_P (i))
143 return;
f902a008 144 /* interval_of updates only ->position of the return value, so
d80f4cc7
RS
145 update the parents manually to speed up update_interval. */
146 while (!NULL_PARENT (i))
147 {
148 if (AM_RIGHT_CHILD (i))
439d5cb4 149 INTERVAL_PARENT (i)->position = i->position
d80f4cc7 150 - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
439d5cb4
KR
151 - TOTAL_LENGTH (INTERVAL_PARENT (i))
152 + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
d80f4cc7 153 else
439d5cb4 154 INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
d80f4cc7 155 + TOTAL_LENGTH (i);
439d5cb4 156 i = INTERVAL_PARENT (i);
d80f4cc7
RS
157 }
158 i = gl_state.forward_i;
ee0cdb48
KH
159 gl_state.b_property = i->position - 1 - gl_state.offset;
160 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
161 goto update;
162 }
163 oldi = i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
164
6a140a74
RS
165 /* We are guarantied to be called with CHARPOS either in i,
166 or further off. */
195d1361
RS
167 if (NULL_INTERVAL_P (i))
168 error ("Error in syntax_table logic for to-the-end intervals");
6a140a74 169 else if (charpos < i->position) /* Move left. */
195d1361
RS
170 {
171 if (count > 0)
f902a008 172 error ("Error in syntax_table logic for intervals <-");
195d1361 173 /* Update the interval. */
6a140a74 174 i = update_interval (i, charpos);
195d1361
RS
175 if (oldi->position != INTERVAL_LAST_POS (i))
176 {
177 invalidate = 0;
178 gl_state.right_ok = 1; /* Invalidate the other end. */
179 gl_state.forward_i = i;
ee0cdb48 180 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
181 }
182 }
6a140a74 183 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
195d1361
RS
184 {
185 if (count < 0)
f902a008 186 error ("Error in syntax_table logic for intervals ->");
195d1361 187 /* Update the interval. */
6a140a74 188 i = update_interval (i, charpos);
195d1361
RS
189 if (i->position != INTERVAL_LAST_POS (oldi))
190 {
191 invalidate = 0;
192 gl_state.left_ok = 1; /* Invalidate the other end. */
193 gl_state.backward_i = i;
ee0cdb48 194 gl_state.b_property = i->position - 1 - gl_state.offset;
195d1361
RS
195 }
196 }
197 else if (count > 0 ? gl_state.right_ok : gl_state.left_ok)
198 {
199 /* We do not need to recalculate tmp_table. */
200 tmp_table = gl_state.old_prop;
201 }
202
203 update:
204 tmp_table = textget (i->plist, Qsyntax_table);
205
206 if (invalidate)
207 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
208
209 if (invalidate) /* Did not get to adjacent interval. */
210 { /* with the same table => */
211 /* invalidate the old range. */
212 if (count > 0)
213 {
214 gl_state.backward_i = i;
215 gl_state.left_ok = 1; /* Invalidate the other end. */
ee0cdb48 216 gl_state.b_property = i->position - 1 - gl_state.offset;
195d1361
RS
217 }
218 else
219 {
220 gl_state.forward_i = i;
221 gl_state.right_ok = 1; /* Invalidate the other end. */
ee0cdb48 222 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
195d1361
RS
223 }
224 }
37bef230 225
195d1361
RS
226 gl_state.current_syntax_table = tmp_table;
227 gl_state.old_prop = tmp_table;
2e34157c 228 if (EQ (Fsyntax_table_p (tmp_table), Qt))
195d1361
RS
229 {
230 gl_state.use_global = 0;
231 }
232 else if (CONSP (tmp_table))
233 {
234 gl_state.use_global = 1;
235 gl_state.global_code = tmp_table;
236 }
237 else
238 {
239 gl_state.use_global = 0;
240 gl_state.current_syntax_table = current_buffer->syntax_table;
241 }
242
243 while (!NULL_INTERVAL_P (i))
244 {
245 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
246 {
247 if (count > 0)
248 gl_state.right_ok = 0;
249 else
250 gl_state.left_ok = 0;
251 break;
252 }
253 else if (cnt == INTERVALS_AT_ONCE)
254 {
255 if (count > 0)
256 gl_state.right_ok = 1;
257 else
258 gl_state.left_ok = 1;
259 break;
260 }
261 cnt++;
262 i = count > 0 ? next_interval (i) : previous_interval (i);
263 }
264 if (NULL_INTERVAL_P (i))
265 { /* This property goes to the end. */
266 if (count > 0)
267 gl_state.e_property = gl_state.stop;
268 else
269 gl_state.b_property = gl_state.start;
270 }
271 else
272 {
273 if (count > 0)
274 {
ee0cdb48 275 gl_state.e_property = i->position - gl_state.offset;
195d1361
RS
276 gl_state.forward_i = i;
277 }
278 else
279 {
ee0cdb48 280 gl_state.b_property = i->position + LENGTH (i) - 1 - gl_state.offset;
195d1361
RS
281 gl_state.backward_i = i;
282 }
283 }
284}
285\f
6a140a74
RS
286/* Returns TRUE if char at CHARPOS is quoted.
287 Global syntax-table data should be set up already to be good at CHARPOS
288 or after. On return global syntax data is good for lookup at CHARPOS. */
195d1361
RS
289
290static int
6a140a74
RS
291char_quoted (charpos, bytepos)
292 register int charpos, bytepos;
195d1361
RS
293{
294 register enum syntaxcode code;
295 register int beg = BEGV;
296 register int quoted = 0;
6a140a74
RS
297 int orig = charpos;
298
299 DEC_BOTH (charpos, bytepos);
195d1361 300
6a140a74 301 while (bytepos >= beg)
195d1361 302 {
6a140a74
RS
303 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
304 code = SYNTAX (FETCH_CHAR (bytepos));
305 if (! (code == Scharquote || code == Sescape))
306 break;
307
308 DEC_BOTH (charpos, bytepos);
309 quoted = !quoted;
195d1361 310 }
6a140a74
RS
311
312 UPDATE_SYNTAX_TABLE (orig);
195d1361
RS
313 return quoted;
314}
6a140a74
RS
315
316/* Return the bytepos one character after BYTEPOS.
317 We assume that BYTEPOS is not at the end of the buffer. */
318
319INLINE int
320inc_bytepos (bytepos)
321 int bytepos;
322{
ef316cf0
RS
323 if (NILP (current_buffer->enable_multibyte_characters))
324 return bytepos + 1;
325
6a140a74
RS
326 INC_POS (bytepos);
327 return bytepos;
328}
329
330/* Return the bytepos one character before BYTEPOS.
331 We assume that BYTEPOS is not at the start of the buffer. */
332
333INLINE int
334dec_bytepos (bytepos)
335 int bytepos;
336{
ef316cf0
RS
337 if (NILP (current_buffer->enable_multibyte_characters))
338 return bytepos - 1;
339
6a140a74
RS
340 DEC_POS (bytepos);
341 return bytepos;
342}
195d1361 343\f
37bef230
RS
344/* Find a defun-start that is the last one before POS (or nearly the last).
345 We record what we find, so that another call in the same area
195d1361
RS
346 can return the same value right away.
347
348 There is no promise at which position the global syntax data is
349 valid on return from the subroutine, so the caller should explicitly
350 update the global data. */
37bef230
RS
351
352static int
6a140a74
RS
353find_defun_start (pos, pos_byte)
354 int pos, pos_byte;
37bef230 355{
6a140a74 356 int opoint = PT, opoint_byte = PT_BYTE;
37bef230
RS
357
358 /* Use previous finding, if it's valid and applies to this inquiry. */
359 if (current_buffer == find_start_buffer
360 /* Reuse the defun-start even if POS is a little farther on.
361 POS might be in the next defun, but that's ok.
362 Our value may not be the best possible, but will still be usable. */
363 && pos <= find_start_pos + 1000
364 && pos >= find_start_value
365 && BEGV == find_start_begv
366 && MODIFF == find_start_modiff)
367 return find_start_value;
368
369 /* Back up to start of line. */
6a140a74 370 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
37bef230 371
195d1361
RS
372 /* We optimize syntax-table lookup for rare updates. Thus we accept
373 only those `^\s(' which are good in global _and_ text-property
374 syntax-tables. */
375 gl_state.current_syntax_table = current_buffer->syntax_table;
376 gl_state.use_global = 0;
4467eb94 377 if (open_paren_in_column_0_is_defun_start)
37bef230 378 {
4467eb94 379 while (PT > BEGV)
195d1361 380 {
4467eb94
GM
381 /* Open-paren at start of line means we may have found our
382 defun-start. */
383 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
384 {
385 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
386 if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
387 break;
388 /* Now fallback to the default value. */
389 gl_state.current_syntax_table = current_buffer->syntax_table;
390 gl_state.use_global = 0;
391 }
392 /* Move to beg of previous line. */
393 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
195d1361 394 }
37bef230
RS
395 }
396
397 /* Record what we found, for the next try. */
6a140a74
RS
398 find_start_value = PT;
399 find_start_value_byte = PT_BYTE;
37bef230
RS
400 find_start_buffer = current_buffer;
401 find_start_modiff = MODIFF;
402 find_start_begv = BEGV;
403 find_start_pos = pos;
404
6a140a74
RS
405 TEMP_SET_PT_BOTH (opoint, opoint_byte);
406
37bef230
RS
407 return find_start_value;
408}
409\f
f902a008
RS
410/* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
411
412static int
413prev_char_comend_first (pos, pos_byte)
414 int pos, pos_byte;
415{
416 int c, val;
417
418 DEC_BOTH (pos, pos_byte);
419 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
420 c = FETCH_CHAR (pos_byte);
421 val = SYNTAX_COMEND_FIRST (c);
422 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
423 return val;
424}
425
426/* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE. */
427
428static int
429prev_char_comstart_first (pos, pos_byte)
430 int pos, pos_byte;
431{
432 int c, val;
433
434 DEC_BOTH (pos, pos_byte);
435 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
436 c = FETCH_CHAR (pos_byte);
437 val = SYNTAX_COMSTART_FIRST (c);
438 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
439 return val;
440}
441
6a140a74
RS
442/* Checks whether charpos FROM is at the end of a comment.
443 FROM_BYTE is the bytepos corresponding to FROM.
444 Do not move back before STOP.
445
446 Return a positive value if we find a comment ending at FROM/FROM_BYTE;
447 return -1 otherwise.
448
449 If successful, store the charpos of the comment's beginning
450 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
527a32d9
KH
451
452 Global syntax data remains valid for backward search starting at
453 the returned value (or at FROM, if the search was not successful). */
195d1361
RS
454
455static int
95ff8dfc 456back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
6a140a74 457 int from, from_byte, stop;
95ff8dfc 458 int comnested, comstyle;
6a140a74 459 int *charpos_ptr, *bytepos_ptr;
195d1361
RS
460{
461 /* Look back, counting the parity of string-quotes,
462 and recording the comment-starters seen.
463 When we reach a safe place, assume that's not in a string;
464 then step the main scan to the earliest comment-starter seen
465 an even number of string quotes away from the safe place.
466
467 OFROM[I] is position of the earliest comment-starter seen
468 which is I+2X quotes from the comment-end.
469 PARITY is current parity of quotes from the comment end. */
3ee5041c 470 int string_style = -1; /* Presumed outside of any string. */
195d1361 471 int string_lossage = 0;
f7c436c1
SM
472 /* Not a real lossage: indicates that we have passed a matching comment
473 starter plus an non-matching comment-ender, meaning that any matching
474 comment-starter we might see later could be a false positive (hidden
475 inside another comment).
476 Test case: { a (* b } c (* d *) */
477 int comment_lossage = 0;
195d1361 478 int comment_end = from;
6a140a74 479 int comment_end_byte = from_byte;
195d1361 480 int comstart_pos = 0;
6a140a74 481 int comstart_byte;
195d1361 482 int scanstart = from - 1;
eb35b628
RS
483 /* Place where the containing defun starts,
484 or 0 if we didn't come across it yet. */
485 int defun_start = 0;
486 int defun_start_byte = 0;
195d1361 487 register enum syntaxcode code;
95ff8dfc 488 int nesting = 1; /* current comment nesting */
ea315ed6 489 int c;
195d1361
RS
490
491 /* At beginning of range to scan, we're outside of strings;
492 that determines quote parity to the comment-end. */
493 while (from != stop)
494 {
4ffe723b 495 int temp_byte;
6a140a74 496
195d1361 497 /* Move back and examine a character. */
6a140a74 498 DEC_BOTH (from, from_byte);
195d1361
RS
499 UPDATE_SYNTAX_TABLE_BACKWARD (from);
500
6a140a74 501 c = FETCH_CHAR (from_byte);
195d1361
RS
502 code = SYNTAX (c);
503
527a32d9 504 /* If this char is the second of a 2-char comment end sequence,
195d1361
RS
505 back up and give the pair the appropriate syntax. */
506 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 507 && prev_char_comend_first (from, from_byte))
195d1361
RS
508 {
509 code = Sendcomment;
6a140a74 510 DEC_BOTH (from, from_byte);
195d1361 511 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 512 c = FETCH_CHAR (from_byte);
195d1361
RS
513 }
514
515 /* If this char starts a 2-char comment start sequence,
516 treat it like a 1-char comment starter. */
f902a008
RS
517 if (from < scanstart && SYNTAX_COMSTART_FIRST (c))
518 {
519 temp_byte = inc_bytepos (from_byte);
520 UPDATE_SYNTAX_TABLE_FORWARD (from + 1);
521 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (temp_byte))
522 && comstyle == SYNTAX_COMMENT_STYLE (FETCH_CHAR (temp_byte)))
523 code = Scomment;
524 UPDATE_SYNTAX_TABLE_BACKWARD (from);
525 }
1aa963c8
SM
526 else if (code == Scomment && comstyle != SYNTAX_COMMENT_STYLE (c))
527 /* Ignore comment starters of a different style. */
528 continue;
195d1361 529
9828a477
KH
530 /* Ignore escaped characters, except comment-enders. */
531 if (code != Sendcomment && char_quoted (from, from_byte))
195d1361
RS
532 continue;
533
3ee5041c 534 switch (code)
195d1361 535 {
3ee5041c
SM
536 case Sstring_fence:
537 case Scomment_fence:
538 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
539 case Sstring:
540 /* Track parity of quotes. */
541 if (string_style == -1)
542 /* Entering a string. */
543 string_style = c;
544 else if (string_style == c)
545 /* Leaving the string. */
546 string_style = -1;
547 else
548 /* If we have two kinds of string delimiters.
549 There's no way to grok this scanning backwards. */
195d1361 550 string_lossage = 1;
3ee5041c
SM
551 break;
552
553 case Scomment:
554 /* We've already checked that it is the relevant comstyle. */
f7c436c1 555 if (string_style != -1 || comment_lossage || string_lossage)
3ee5041c
SM
556 /* There are odd string quotes involved, so let's be careful.
557 Test case in Pascal: " { " a { " } */
558 goto lossage;
559
f7c436c1
SM
560 if (!comnested)
561 {
562 /* Record best comment-starter so far. */
563 comstart_pos = from;
564 comstart_byte = from_byte;
565 }
566 else if (--nesting <= 0)
95ff8dfc
RS
567 /* nested comments have to be balanced, so we don't need to
568 keep looking for earlier ones. We use here the same (slightly
569 incorrect) reasoning as below: since it is followed by uniform
570 paired string quotes, this comment-start has to be outside of
571 strings, else the comment-end itself would be inside a string. */
572 goto done;
3ee5041c
SM
573 break;
574
02010917
SM
575 case Sendcomment:
576 if (SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
577 /* This is the same style of comment ender as ours. */
578 {
579 if (comnested)
580 nesting++;
581 else
582 /* Anything before that can't count because it would match
583 this comment-ender rather than ours. */
584 from = stop; /* Break out of the loop. */
585 }
f7c436c1
SM
586 else if (comstart_pos != 0 || c != '\n')
587 /* We're mixing comment styles here, so we'd better be careful.
588 The (comstart_pos != 0 || c != '\n') check is not quite correct
589 (we should just always set comment_lossage), but removing it
590 would imply that any multiline comment in C would go through
591 lossage, which seems overkill.
592 The failure should only happen in the rare cases such as
593 { (* } *) */
594 comment_lossage = 1;
02010917 595 break;
195d1361 596
02010917
SM
597 case Sopen:
598 /* Assume a defun-start point is outside of strings. */
599 if (open_paren_in_column_0_is_defun_start
600 && (from == stop
601 || (temp_byte = dec_bytepos (from_byte),
602 FETCH_CHAR (temp_byte) == '\n')))
603 {
604 defun_start = from;
605 defun_start_byte = from_byte;
606 from = stop; /* Break out of the loop. */
607 }
eb35b628 608 break;
02010917
SM
609
610 default:
f7c436c1 611 break;
eb35b628 612 }
195d1361
RS
613 }
614
615 if (comstart_pos == 0)
616 {
617 from = comment_end;
6a140a74 618 from_byte = comment_end_byte;
195d1361
RS
619 UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
620 }
f7c436c1
SM
621 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
622 or `done'), then we've found the beginning of the non-nested comment. */
623 else if (1) /* !comnested */
195d1361
RS
624 {
625 from = comstart_pos;
6a140a74 626 from_byte = comstart_byte;
195d1361
RS
627 /* Globals are correct now. */
628 }
629 else
630 {
3ee5041c
SM
631 struct lisp_parse_state state;
632 lossage:
195d1361
RS
633 /* We had two kinds of string delimiters mixed up
634 together. Decode this going forwards.
02010917 635 Scan fwd from a known safe place (beginning-of-defun)
195d1361
RS
636 to the one in question; this records where we
637 last passed a comment starter. */
eb35b628
RS
638 /* If we did not already find the defun start, find it now. */
639 if (defun_start == 0)
640 {
641 defun_start = find_defun_start (comment_end, comment_end_byte);
642 defun_start_byte = find_start_value_byte;
643 }
02010917 644 do
195d1361 645 {
02010917
SM
646 scan_sexps_forward (&state,
647 defun_start, defun_start_byte,
648 comment_end, -10000, 0, Qnil, 0);
649 defun_start = comment_end;
650 if (state.incomment == (comnested ? 1 : -1)
651 && state.comstyle == comstyle)
652 from = state.comstr_start;
653 else
654 {
655 from = comment_end;
656 if (state.incomment)
657 /* If comment_end is inside some other comment, maybe ours
658 is nested, so we need to try again from within the
659 surrounding comment. Example: { a (* " *) */
660 {
661 /* FIXME: We should advance by one or two chars. */
662 defun_start = state.comstr_start + 2;
663 defun_start_byte = CHAR_TO_BYTE (defun_start);
664 }
665 }
666 } while (defun_start < comment_end);
667
6a140a74 668 from_byte = CHAR_TO_BYTE (from);
195d1361
RS
669 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
670 }
671
95ff8dfc 672 done:
6a140a74
RS
673 *charpos_ptr = from;
674 *bytepos_ptr = from_byte;
675
1aa963c8 676 return (from == comment_end) ? -1 : from;
195d1361
RS
677}
678\f
8489eb67 679DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
2203e1e8 680 "Return t if OBJECT is a syntax table.\n\
8ea151b2 681Currently, any char-table counts as a syntax table.")
2203e1e8
EN
682 (object)
683 Lisp_Object object;
8489eb67 684{
2203e1e8 685 if (CHAR_TABLE_P (object)
e704cb4b 686 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
8489eb67
RS
687 return Qt;
688 return Qnil;
689}
690
8ea151b2 691static void
8489eb67
RS
692check_syntax_table (obj)
693 Lisp_Object obj;
694{
d1be9f0f 695 if (!(CHAR_TABLE_P (obj)
e704cb4b 696 && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table)))
d1be9f0f 697 wrong_type_argument (Qsyntax_table_p, obj);
8489eb67
RS
698}
699
8489eb67
RS
700DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
701 "Return the current syntax table.\n\
702This is the one specified by the current buffer.")
703 ()
704{
705 return current_buffer->syntax_table;
706}
707
708DEFUN ("standard-syntax-table", Fstandard_syntax_table,
709 Sstandard_syntax_table, 0, 0, 0,
710 "Return the standard syntax table.\n\
711This is the one used for new buffers.")
712 ()
713{
714 return Vstandard_syntax_table;
715}
716
717DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
718 "Construct a new syntax table and return it.\n\
719It is a copy of the TABLE, which defaults to the standard syntax table.")
720 (table)
721 Lisp_Object table;
722{
8ea151b2
RS
723 Lisp_Object copy;
724
265a9e55 725 if (!NILP (table))
8ea151b2
RS
726 check_syntax_table (table);
727 else
728 table = Vstandard_syntax_table;
729
730 copy = Fcopy_sequence (table);
0f867324
RS
731
732 /* Only the standard syntax table should have a default element.
733 Other syntax tables should inherit from parents instead. */
734 XCHAR_TABLE (copy)->defalt = Qnil;
735
736 /* Copied syntax tables should all have parents.
737 If we copied one with no parent, such as the standard syntax table,
738 use the standard syntax table as the copy's parent. */
739 if (NILP (XCHAR_TABLE (copy)->parent))
740 Fset_char_table_parent (copy, Vstandard_syntax_table);
8ea151b2 741 return copy;
8489eb67
RS
742}
743
744DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
745 "Select a new syntax table for the current buffer.\n\
746One argument, a syntax table.")
747 (table)
748 Lisp_Object table;
749{
4e4dfa78 750 int idx;
8ea151b2 751 check_syntax_table (table);
8489eb67
RS
752 current_buffer->syntax_table = table;
753 /* Indicate that this buffer now has a specified syntax table. */
f6cd0527
GM
754 idx = PER_BUFFER_VAR_IDX (syntax_table);
755 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
8489eb67
RS
756 return table;
757}
758\f
759/* Convert a letter which signifies a syntax code
760 into the code it signifies.
195d1361 761 This is used by modify-syntax-entry, and other things. */
8489eb67
RS
762
763unsigned char syntax_spec_code[0400] =
764 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
765 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
766 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
767 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
195d1361 768 (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
8489eb67
RS
769 (char) Smath, 0377, 0377, (char) Squote,
770 (char) Sopen, (char) Sclose, 0377, 0377,
771 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
772 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
773 0377, 0377, 0377, 0377,
774 (char) Scomment, 0377, (char) Sendcomment, 0377,
6cb71bf6 775 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
8489eb67
RS
776 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
777 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
778 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
779 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
780 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
781 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
195d1361 782 0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
8489eb67
RS
783 };
784
195d1361 785/* Indexed by syntax code, give the letter that describes it. */
8489eb67 786
195d1361 787char syntax_code_spec[16] =
8489eb67 788 {
195d1361
RS
789 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
790 '!', '|'
8489eb67 791 };
93da5fff
KH
792
793/* Indexed by syntax code, give the object (cons of syntax code and
794 nil) to be stored in syntax table. Since these objects can be
795 shared among syntax tables, we generate them in advance. By
796 sharing objects, the function `describe-syntax' can give a more
797 compact listing. */
798static Lisp_Object Vsyntax_code_object;
799
8489eb67 800\f
8ea151b2
RS
801/* Look up the value for CHARACTER in syntax table TABLE's parent
802 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
803 for CHARACTER. It's actually used only when not compiled with GCC. */
804
805Lisp_Object
806syntax_parent_lookup (table, character)
807 Lisp_Object table;
808 int character;
809{
810 Lisp_Object value;
811
812 while (1)
813 {
814 table = XCHAR_TABLE (table)->parent;
815 if (NILP (table))
816 return Qnil;
817
818 value = XCHAR_TABLE (table)->contents[character];
819 if (!NILP (value))
820 return value;
821 }
822}
823
8489eb67 824DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
2203e1e8
EN
825 "Return the syntax code of CHARACTER, described by a character.\n\
826For example, if CHARACTER is a word constituent,\n\
827the character `w' is returned.\n\
8489eb67
RS
828The characters that correspond to various syntax codes\n\
829are listed in the documentation of `modify-syntax-entry'.")
2203e1e8
EN
830 (character)
831 Lisp_Object character;
8489eb67 832{
8ea151b2 833 int char_int;
195d1361
RS
834 gl_state.current_syntax_table = current_buffer->syntax_table;
835
836 gl_state.use_global = 0;
2203e1e8
EN
837 CHECK_NUMBER (character, 0);
838 char_int = XINT (character);
8ea151b2 839 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
beefa22e
RS
840}
841
842DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
2203e1e8
EN
843 "Return the matching parenthesis of CHARACTER, or nil if none.")
844 (character)
845 Lisp_Object character;
beefa22e 846{
8ea151b2 847 int char_int, code;
195d1361
RS
848 gl_state.current_syntax_table = current_buffer->syntax_table;
849 gl_state.use_global = 0;
2203e1e8
EN
850 CHECK_NUMBER (character, 0);
851 char_int = XINT (character);
8ea151b2 852 code = SYNTAX (char_int);
a8bd7cd8 853 if (code == Sopen || code == Sclose)
2e34157c 854 return SYNTAX_MATCH (char_int);
beefa22e 855 return Qnil;
8489eb67
RS
856}
857
858/* This comment supplies the doc string for modify-syntax-entry,
859 for make-docfile to see. We cannot put this in the real DEFUN
860 due to limits in the Unix cpp.
861
2fcea188 862DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
8489eb67
RS
863 "Set syntax for character CHAR according to string S.\n\
864The syntax is changed only for table TABLE, which defaults to\n\
865 the current buffer's syntax table.\n\
866The first character of S should be one of the following:\n\
32676c08
JB
867 Space or - whitespace syntax. w word constituent.\n\
868 _ symbol constituent. . punctuation.\n\
869 ( open-parenthesis. ) close-parenthesis.\n\
870 \" string quote. \\ escape.\n\
871 $ paired delimiter. ' expression quote or prefix operator.\n\
abae13bd 872 < comment starter. > comment ender.\n\
6cb71bf6 873 / character-quote. @ inherit from `standard-syntax-table'.\n\
abae13bd 874\n\
8489eb67
RS
875Only single-character comment start and end sequences are represented thus.\n\
876Two-character sequences are represented as described below.\n\
877The second character of S is the matching parenthesis,\n\
878 used only if the first character is `(' or `)'.\n\
879Any additional characters are flags.\n\
95ff8dfc 880Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
2203e1e8
EN
881 1 means CHAR is the start of a two-char comment start sequence.\n\
882 2 means CHAR is the second character of such a sequence.\n\
883 3 means CHAR is the start of a two-char comment end sequence.\n\
884 4 means CHAR is the second character of such a sequence.\n\
e5d4f4dc 885\n\
195d1361 886There can be up to two orthogonal comment sequences. This is to support\n\
e5d4f4dc 887language modes such as C++. By default, all comment sequences are of style\n\
abae13bd
RS
888a, but you can set the comment sequence style to b (on the second character\n\
889of a comment-start, or the first character of a comment-end sequence) using\n\
e5d4f4dc 890this flag:\n\
2203e1e8 891 b means CHAR is part of comment sequence b.\n\
95ff8dfc 892 n means CHAR is part of a nestable comment sequence.\n\
e5d4f4dc 893\n\
2203e1e8 894 p means CHAR is a prefix character for `backward-prefix-chars';\n\
e5d4f4dc 895 such characters are treated as whitespace when they occur\n\
8489eb67 896 between expressions.")
2fcea188 897 (char, s, table)
8489eb67
RS
898*/
899
900DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
901 /* I really don't know why this is interactive
902 help-form should at least be made useful whilst reading the second arg
903 */
904 "cSet syntax for character: \nsSet syntax for %s to: ",
905 0 /* See immediately above */)
906 (c, newentry, syntax_table)
907 Lisp_Object c, newentry, syntax_table;
908{
8ea151b2 909 register unsigned char *p;
8489eb67 910 register enum syntaxcode code;
247e20a8 911 int val;
8ea151b2 912 Lisp_Object match;
8489eb67
RS
913
914 CHECK_NUMBER (c, 0);
915 CHECK_STRING (newentry, 1);
8ea151b2 916
265a9e55 917 if (NILP (syntax_table))
8489eb67
RS
918 syntax_table = current_buffer->syntax_table;
919 else
8ea151b2 920 check_syntax_table (syntax_table);
8489eb67
RS
921
922 p = XSTRING (newentry)->data;
923 code = (enum syntaxcode) syntax_spec_code[*p++];
924 if (((int) code & 0377) == 0377)
2e34157c 925 error ("invalid syntax description letter: %c", p[-1]);
8489eb67 926
8ea151b2
RS
927 if (code == Sinherit)
928 {
e704cb4b 929 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), Qnil);
8ea151b2
RS
930 return Qnil;
931 }
932
933 if (*p)
d1be9f0f 934 {
93da5fff 935 int len;
7c19e8e1
KH
936 int character = (STRING_CHAR_AND_LENGTH
937 (p, STRING_BYTES (XSTRING (newentry)) - 1, len));
93da5fff 938 XSETINT (match, character);
d1be9f0f
RS
939 if (XFASTINT (match) == ' ')
940 match = Qnil;
93da5fff 941 p += len;
d1be9f0f
RS
942 }
943 else
8ea151b2 944 match = Qnil;
8489eb67 945
8ea151b2 946 val = (int) code;
8489eb67
RS
947 while (*p)
948 switch (*p++)
949 {
950 case '1':
247e20a8 951 val |= 1 << 16;
8489eb67
RS
952 break;
953
954 case '2':
247e20a8 955 val |= 1 << 17;
8489eb67
RS
956 break;
957
958 case '3':
247e20a8 959 val |= 1 << 18;
8489eb67
RS
960 break;
961
962 case '4':
247e20a8 963 val |= 1 << 19;
8489eb67
RS
964 break;
965
966 case 'p':
247e20a8 967 val |= 1 << 20;
8489eb67 968 break;
e5d4f4dc
RS
969
970 case 'b':
247e20a8 971 val |= 1 << 21;
e5d4f4dc 972 break;
95ff8dfc
RS
973
974 case 'n':
975 val |= 1 << 22;
976 break;
8489eb67
RS
977 }
978
93da5fff
KH
979 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
980 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
981 else
982 /* Since we can't use a shared object, let's make a new one. */
983 newentry = Fcons (make_number (val), match);
984
e704cb4b 985 SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
8489eb67
RS
986
987 return Qnil;
988}
989\f
990/* Dump syntax table to buffer in human-readable format */
991
340f92b5 992static void
8489eb67
RS
993describe_syntax (value)
994 Lisp_Object value;
995{
996 register enum syntaxcode code;
e6365855 997 char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
8489eb67 998 char str[2];
8ea151b2 999 Lisp_Object first, match_lisp;
8489eb67
RS
1000
1001 Findent_to (make_number (16), make_number (1));
1002
8ea151b2
RS
1003 if (NILP (value))
1004 {
93da5fff 1005 insert_string ("default\n");
8ea151b2
RS
1006 return;
1007 }
1008
908b7fea
KH
1009 if (CHAR_TABLE_P (value))
1010 {
1011 insert_string ("deeper char-table ...\n");
1012 return;
1013 }
1014
8ea151b2
RS
1015 if (!CONSP (value))
1016 {
93da5fff 1017 insert_string ("invalid\n");
8ea151b2
RS
1018 return;
1019 }
1020
c1d497be
KR
1021 first = XCAR (value);
1022 match_lisp = XCDR (value);
8ea151b2
RS
1023
1024 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
8489eb67 1025 {
93da5fff 1026 insert_string ("invalid\n");
8489eb67
RS
1027 return;
1028 }
1029
e704cb4b 1030 code = (enum syntaxcode) (XINT (first) & 0377);
8ea151b2
RS
1031 start1 = (XINT (first) >> 16) & 1;
1032 start2 = (XINT (first) >> 17) & 1;
1033 end1 = (XINT (first) >> 18) & 1;
1034 end2 = (XINT (first) >> 19) & 1;
1035 prefix = (XINT (first) >> 20) & 1;
1036 comstyle = (XINT (first) >> 21) & 1;
e6365855 1037 comnested = (XINT (first) >> 22) & 1;
8489eb67
RS
1038
1039 if ((int) code < 0 || (int) code >= (int) Smax)
1040 {
1041 insert_string ("invalid");
1042 return;
1043 }
1044 desc = syntax_code_spec[(int) code];
1045
1046 str[0] = desc, str[1] = 0;
1047 insert (str, 1);
1048
93da5fff
KH
1049 if (NILP (match_lisp))
1050 insert (" ", 1);
1051 else
1052 insert_char (XINT (match_lisp));
8489eb67 1053
8489eb67
RS
1054 if (start1)
1055 insert ("1", 1);
1056 if (start2)
1057 insert ("2", 1);
1058
1059 if (end1)
1060 insert ("3", 1);
1061 if (end2)
1062 insert ("4", 1);
1063
1064 if (prefix)
1065 insert ("p", 1);
e5d4f4dc
RS
1066 if (comstyle)
1067 insert ("b", 1);
e6365855
SM
1068 if (comnested)
1069 insert ("n", 1);
8489eb67
RS
1070
1071 insert_string ("\twhich means: ");
1072
0220c518 1073 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
1074 {
1075 case Swhitespace:
1076 insert_string ("whitespace"); break;
1077 case Spunct:
1078 insert_string ("punctuation"); break;
1079 case Sword:
1080 insert_string ("word"); break;
1081 case Ssymbol:
1082 insert_string ("symbol"); break;
1083 case Sopen:
1084 insert_string ("open"); break;
1085 case Sclose:
1086 insert_string ("close"); break;
1087 case Squote:
1088 insert_string ("quote"); break;
1089 case Sstring:
1090 insert_string ("string"); break;
1091 case Smath:
1092 insert_string ("math"); break;
1093 case Sescape:
1094 insert_string ("escape"); break;
1095 case Scharquote:
1096 insert_string ("charquote"); break;
1097 case Scomment:
1098 insert_string ("comment"); break;
1099 case Sendcomment:
1100 insert_string ("endcomment"); break;
1101 default:
1102 insert_string ("invalid");
1103 return;
1104 }
1105
8ea151b2 1106 if (!NILP (match_lisp))
8489eb67
RS
1107 {
1108 insert_string (", matches ");
8ea151b2 1109 insert_char (XINT (match_lisp));
8489eb67
RS
1110 }
1111
1112 if (start1)
1113 insert_string (",\n\t is the first character of a comment-start sequence");
1114 if (start2)
1115 insert_string (",\n\t is the second character of a comment-start sequence");
1116
1117 if (end1)
1118 insert_string (",\n\t is the first character of a comment-end sequence");
1119 if (end2)
1120 insert_string (",\n\t is the second character of a comment-end sequence");
e5d4f4dc
RS
1121 if (comstyle)
1122 insert_string (" (comment style b)");
e6365855
SM
1123 if (comnested)
1124 insert_string (" (nestable)");
e5d4f4dc 1125
8489eb67
RS
1126 if (prefix)
1127 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
1128
1129 insert_string ("\n");
1130}
1131
340f92b5 1132static Lisp_Object
8489eb67
RS
1133describe_syntax_1 (vector)
1134 Lisp_Object vector;
1135{
1136 struct buffer *old = current_buffer;
1137 set_buffer_internal (XBUFFER (Vstandard_output));
e41819d4 1138 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil, (int *) 0, 0);
93da5fff
KH
1139 while (! NILP (XCHAR_TABLE (vector)->parent))
1140 {
1141 vector = XCHAR_TABLE (vector)->parent;
1142 insert_string ("\nThe parent syntax table is:");
e41819d4
RS
1143 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil,
1144 (int *) 0, 0);
93da5fff
KH
1145 }
1146
36cd82fe 1147 call0 (intern ("help-mode"));
8489eb67
RS
1148 set_buffer_internal (old);
1149 return Qnil;
1150}
1151
1152DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
1153 "Describe the syntax specifications in the syntax table.\n\
1154The descriptions are inserted in a buffer, which is then displayed.")
1155 ()
1156{
1157 internal_with_output_to_temp_buffer
1158 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
1159
1160 return Qnil;
1161}
1162\f
195d1361
RS
1163int parse_sexp_ignore_comments;
1164
8489eb67
RS
1165/* Return the position across COUNT words from FROM.
1166 If that many words cannot be found before the end of the buffer, return 0.
1167 COUNT negative means scan backward and stop at word beginning. */
1168
dfcf069d 1169int
8489eb67
RS
1170scan_words (from, count)
1171 register int from, count;
1172{
1173 register int beg = BEGV;
1174 register int end = ZV;
6a140a74 1175 register int from_byte = CHAR_TO_BYTE (from);
93da5fff
KH
1176 register enum syntaxcode code;
1177 int ch0, ch1;
8489eb67
RS
1178
1179 immediate_quit = 1;
1180 QUIT;
1181
195d1361
RS
1182 SETUP_SYNTAX_TABLE (from, count);
1183
8489eb67
RS
1184 while (count > 0)
1185 {
1186 while (1)
1187 {
1188 if (from == end)
1189 {
1190 immediate_quit = 0;
1191 return 0;
1192 }
195d1361 1193 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1194 ch0 = FETCH_CHAR (from_byte);
93da5fff 1195 code = SYNTAX (ch0);
6a140a74 1196 INC_BOTH (from, from_byte);
8489eb67
RS
1197 if (words_include_escapes
1198 && (code == Sescape || code == Scharquote))
1199 break;
1200 if (code == Sword)
1201 break;
8489eb67 1202 }
93da5fff
KH
1203 /* Now CH0 is a character which begins a word and FROM is the
1204 position of the next character. */
8489eb67
RS
1205 while (1)
1206 {
1207 if (from == end) break;
195d1361 1208 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 1209 ch1 = FETCH_CHAR (from_byte);
93da5fff 1210 code = SYNTAX (ch1);
8489eb67
RS
1211 if (!(words_include_escapes
1212 && (code == Sescape || code == Scharquote)))
93da5fff 1213 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1214 break;
6a140a74 1215 INC_BOTH (from, from_byte);
93da5fff 1216 ch0 = ch1;
8489eb67
RS
1217 }
1218 count--;
1219 }
1220 while (count < 0)
1221 {
1222 while (1)
1223 {
1224 if (from == beg)
1225 {
1226 immediate_quit = 0;
1227 return 0;
1228 }
6a140a74 1229 DEC_BOTH (from, from_byte);
195d1361 1230 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1231 ch1 = FETCH_CHAR (from_byte);
93da5fff 1232 code = SYNTAX (ch1);
8489eb67
RS
1233 if (words_include_escapes
1234 && (code == Sescape || code == Scharquote))
1235 break;
1236 if (code == Sword)
1237 break;
8489eb67 1238 }
93da5fff
KH
1239 /* Now CH1 is a character which ends a word and FROM is the
1240 position of it. */
8489eb67
RS
1241 while (1)
1242 {
6a140a74
RS
1243 int temp_byte;
1244
1245 if (from == beg)
1246 break;
1247 temp_byte = dec_bytepos (from_byte);
195d1361 1248 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 1249 ch0 = FETCH_CHAR (temp_byte);
93da5fff 1250 code = SYNTAX (ch0);
8489eb67
RS
1251 if (!(words_include_escapes
1252 && (code == Sescape || code == Scharquote)))
93da5fff 1253 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
8489eb67 1254 break;
6a140a74 1255 DEC_BOTH (from, from_byte);
93da5fff 1256 ch1 = ch0;
8489eb67
RS
1257 }
1258 count++;
1259 }
1260
1261 immediate_quit = 0;
1262
1263 return from;
1264}
1265
1266DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
1267 "Move point forward ARG words (backward if ARG is negative).\n\
1268Normally returns t.\n\
2c6ea900 1269If an edge of the buffer or a field boundary is reached, point is left there\n\
e6d8341f
GM
1270and the function returns nil. Field boundaries are not noticed if\n\
1271`inhibit-field-text-motion' is non-nil.")
8489eb67
RS
1272 (count)
1273 Lisp_Object count;
1274{
2c6ea900 1275 int orig_val, val;
8489eb67
RS
1276 CHECK_NUMBER (count, 0);
1277
2c6ea900
RS
1278 val = orig_val = scan_words (PT, XINT (count));
1279 if (! orig_val)
1280 val = XINT (count) > 0 ? ZV : BEGV;
5878ee6f 1281
b8855607 1282 /* Avoid jumping out of an input field. */
64aa4eb1 1283 val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
070fe99c 1284 Qt, Qnil, Qnil));
64aa4eb1 1285
8489eb67 1286 SET_PT (val);
e6d8341f 1287 return val == orig_val ? Qt : Qnil;
8489eb67
RS
1288}
1289\f
195d1361
RS
1290Lisp_Object skip_chars ();
1291
1292DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1293 "Move point forward, stopping before a char not in STRING, or at pos LIM.\n\
1294STRING is like the inside of a `[...]' in a regular expression\n\
74cf0beb
RS
1295except that `]' is never special and `\\' quotes `^', `-' or `\\'\n\
1296 (but not as the end of a range; quoting is never needed there).\n\
195d1361
RS
1297Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
1298With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
1299Returns the distance traveled, either zero or positive.")
1300 (string, lim)
1301 Lisp_Object string, lim;
1302{
1303 return skip_chars (1, 0, string, lim);
1304}
1305
1306DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1307 "Move point backward, stopping after a char not in STRING, or at pos LIM.\n\
1308See `skip-chars-forward' for details.\n\
1309Returns the distance traveled, either zero or negative.")
1310 (string, lim)
1311 Lisp_Object string, lim;
1312{
1313 return skip_chars (0, 0, string, lim);
1314}
1315
1316DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1317 "Move point forward across chars in specified syntax classes.\n\
1318SYNTAX is a string of syntax code characters.\n\
1319Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
1320If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1321This function returns the distance traveled, either zero or positive.")
1322 (syntax, lim)
1323 Lisp_Object syntax, lim;
1324{
1325 return skip_chars (1, 1, syntax, lim);
1326}
1327
1328DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1329 "Move point backward across chars in specified syntax classes.\n\
1330SYNTAX is a string of syntax code characters.\n\
1331Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
1332If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
1333This function returns the distance traveled, either zero or negative.")
1334 (syntax, lim)
1335 Lisp_Object syntax, lim;
1336{
1337 return skip_chars (0, 1, syntax, lim);
1338}
1339
6a140a74 1340static Lisp_Object
195d1361
RS
1341skip_chars (forwardp, syntaxp, string, lim)
1342 int forwardp, syntaxp;
1343 Lisp_Object string, lim;
1344{
195d1361 1345 register unsigned int c;
7e68b0ea 1346 register int ch;
195d1361 1347 unsigned char fastmap[0400];
7e68b0ea
RS
1348 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters
1349 of which codes don't fit in FASTMAP. In that case, we set the
1350 first byte of multibyte form (i.e. base leading-code) in FASTMAP
1351 and set the actual ranges of characters in CHAR_RANGES. In the
1352 form "X-Y" of STRING, both X and Y must belong to the same
1353 character set because a range striding across character sets is
1354 meaningless. */
5df4982e 1355 int *char_ranges;
7e68b0ea 1356 int n_char_ranges = 0;
195d1361 1357 int negate = 0;
4101e6fe 1358 register int i, i_byte;
7e68b0ea 1359 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1674d9a2
RS
1360 int string_multibyte;
1361 int size_byte;
82d497fc
KH
1362 unsigned char *str;
1363 int len;
195d1361
RS
1364
1365 CHECK_STRING (string, 0);
5df4982e 1366 char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
1674d9a2 1367 string_multibyte = STRING_MULTIBYTE (string);
82d497fc 1368 str = XSTRING (string)->data;
1674d9a2 1369 size_byte = STRING_BYTES (XSTRING (string));
195d1361 1370
82d497fc
KH
1371 /* Adjust the multibyteness of the string to that of the buffer. */
1372 if (multibyte != string_multibyte)
1373 {
1374 int nbytes;
1375
1376 if (multibyte)
1377 nbytes = count_size_as_multibyte (XSTRING (string)->data,
1378 XSTRING (string)->size);
1379 else
1380 nbytes = XSTRING (string)->size;
1381 if (nbytes != size_byte)
1382 {
1383 str = (unsigned char *) alloca (nbytes);
e39091b6 1384 copy_text (XSTRING (string)->data, str, size_byte,
82d497fc
KH
1385 string_multibyte, multibyte);
1386 size_byte = nbytes;
1387 }
1388 }
1389
195d1361
RS
1390 if (NILP (lim))
1391 XSETINT (lim, forwardp ? ZV : BEGV);
1392 else
6a140a74 1393 CHECK_NUMBER_COERCE_MARKER (lim, 0);
195d1361
RS
1394
1395 /* In any case, don't allow scan outside bounds of buffer. */
195d1361
RS
1396 if (XINT (lim) > ZV)
1397 XSETFASTINT (lim, ZV);
1398 if (XINT (lim) < BEGV)
1399 XSETFASTINT (lim, BEGV);
1400
195d1361
RS
1401 bzero (fastmap, sizeof fastmap);
1402
82d497fc 1403 i_byte = 0;
4101e6fe 1404
13090112 1405 if (i_byte < size_byte
4101e6fe 1406 && XSTRING (string)->data[0] == '^')
195d1361 1407 {
82d497fc 1408 negate = 1; i_byte++;
195d1361
RS
1409 }
1410
1411 /* Find the characters specified and set their elements of fastmap.
1412 If syntaxp, each character counts as itself.
1413 Otherwise, handle backslashes and ranges specially. */
1414
13090112 1415 while (i_byte < size_byte)
195d1361 1416 {
82d497fc 1417 int c_leading_code = str[i_byte];
4101e6fe 1418
82d497fc
KH
1419 c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
1420 i_byte += len;
4101e6fe 1421
195d1361 1422 if (syntaxp)
4101e6fe 1423 fastmap[syntax_spec_code[c & 0377]] = 1;
195d1361
RS
1424 else
1425 {
1426 if (c == '\\')
1427 {
13090112 1428 if (i_byte == size_byte)
4101e6fe
RS
1429 break;
1430
82d497fc
KH
1431 c_leading_code = str[i_byte];
1432 c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1433 i_byte += len;
195d1361 1434 }
13090112 1435 if (i_byte < size_byte
82d497fc 1436 && str[i_byte] == '-')
195d1361 1437 {
06274af5 1438 unsigned int c2, c2_leading_code;
4101e6fe
RS
1439
1440 /* Skip over the dash. */
82d497fc 1441 i_byte++;
4101e6fe 1442
13090112 1443 if (i_byte == size_byte)
4101e6fe
RS
1444 break;
1445
1446 /* Get the end of the range. */
82d497fc
KH
1447 c2_leading_code = str[i_byte];
1448 c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
1449 i_byte += len;
7e68b0ea 1450
4101e6fe 1451 if (SINGLE_BYTE_CHAR_P (c))
06274af5
KH
1452 {
1453 if (! SINGLE_BYTE_CHAR_P (c2))
e39091b6
KH
1454 {
1455 /* Handle a range such as \177-\377 in multibyte
1456 mode. Split that into two ranges, the low
1457 one ending at 0237, and the high one starting
1458 at the smallest character in the charset of
1459 C2 and ending at C2. */
1460 int charset = CHAR_CHARSET (c2);
1461 int c1 = MAKE_CHAR (charset, 0, 0);
1462
1463 fastmap[c2_leading_code] = 1;
1464 char_ranges[n_char_ranges++] = c1;
1465 char_ranges[n_char_ranges++] = c2;
1466 c2 = 0237;
1467 }
06274af5
KH
1468 while (c <= c2)
1469 {
1470 fastmap[c] = 1;
1471 c++;
1472 }
1473 }
e39091b6 1474 else if (! SINGLE_BYTE_CHAR_P (c2))
195d1361 1475 {
06274af5 1476 if (c_leading_code != c2_leading_code)
688351f2 1477 error ("Invalid character range: %s",
06274af5 1478 XSTRING (string)->data);
4101e6fe
RS
1479 if (c <= c2)
1480 {
e39091b6 1481 fastmap[c_leading_code] = 1;
4101e6fe
RS
1482 char_ranges[n_char_ranges++] = c;
1483 char_ranges[n_char_ranges++] = c2;
1484 }
195d1361 1485 }
195d1361
RS
1486 }
1487 else
7e68b0ea 1488 {
e39091b6
KH
1489 if (SINGLE_BYTE_CHAR_P (c))
1490 fastmap[c] = 1;
1491 else
7e68b0ea 1492 {
e39091b6 1493 fastmap[c_leading_code] = 1;
4101e6fe
RS
1494 char_ranges[n_char_ranges++] = c;
1495 char_ranges[n_char_ranges++] = c;
7e68b0ea
RS
1496 }
1497 }
195d1361
RS
1498 }
1499 }
1500
7e68b0ea
RS
1501 /* If ^ was the first character, complement the fastmap. In
1502 addition, as all multibyte characters have possibility of
1503 matching, set all entries for base leading codes, which is
1504 harmless even if SYNTAXP is 1. */
195d1361
RS
1505
1506 if (negate)
1507 for (i = 0; i < sizeof fastmap; i++)
7e68b0ea
RS
1508 {
1509 if (!multibyte || !BASE_LEADING_CODE_P (i))
1510 fastmap[i] ^= 1;
1511 else
1512 fastmap[i] = 1;
1513 }
195d1361
RS
1514
1515 {
1516 int start_point = PT;
1517 int pos = PT;
6a140a74 1518 int pos_byte = PT_BYTE;
195d1361
RS
1519
1520 immediate_quit = 1;
1521 if (syntaxp)
1522 {
1523 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1524 if (forwardp)
1525 {
7e68b0ea 1526 if (multibyte)
195d1361 1527 {
ee0cdb48
KH
1528 if (pos < XINT (lim))
1529 while (fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
1530 {
1531 /* Since we already checked for multibyteness,
1532 avoid using INC_BOTH which checks again. */
1533 INC_POS (pos_byte);
1534 pos++;
1535 if (pos >= XINT (lim))
1536 break;
1537 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1538 }
7e68b0ea
RS
1539 }
1540 else
1541 {
1542 while (pos < XINT (lim)
1543 && fastmap[(int) SYNTAX (FETCH_BYTE (pos))])
1544 {
1545 pos++;
1546 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1547 }
195d1361
RS
1548 }
1549 }
1550 else
1551 {
7e68b0ea 1552 if (multibyte)
195d1361 1553 {
7e68b0ea
RS
1554 while (pos > XINT (lim))
1555 {
6a140a74 1556 int savepos = pos_byte;
ef316cf0
RS
1557 /* Since we already checked for multibyteness,
1558 avoid using DEC_BOTH which checks again. */
1559 pos--;
1560 DEC_POS (pos_byte);
4c8535eb 1561 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
6a140a74 1562 if (!fastmap[(int) SYNTAX (FETCH_CHAR (pos_byte))])
7e68b0ea 1563 {
6a140a74
RS
1564 pos++;
1565 pos_byte = savepos;
7e68b0ea
RS
1566 break;
1567 }
7e68b0ea
RS
1568 }
1569 }
1570 else
1571 {
ee0cdb48
KH
1572 if (pos > XINT (lim))
1573 while (fastmap[(int) SYNTAX (FETCH_BYTE (pos - 1))])
1574 {
1575 pos--;
1576 if (pos <= XINT (lim))
4c8535eb 1577 break;
ee0cdb48
KH
1578 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
1579 }
195d1361
RS
1580 }
1581 }
1582 }
1583 else
1584 {
1585 if (forwardp)
1586 {
7e68b0ea 1587 if (multibyte)
6a140a74 1588 while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
7e68b0ea 1589 {
e39091b6
KH
1590 /* If we are looking at a multibyte character, we
1591 must look up the character in the table
1592 CHAR_RANGES. If there's no data in the table,
1593 that character is not what we want to skip. */
1594 if (BASE_LEADING_CODE_P (c)
1595 && (c = FETCH_MULTIBYTE_CHAR (pos_byte),
1596 ! SINGLE_BYTE_CHAR_P (c)))
7e68b0ea 1597 {
e39091b6
KH
1598 /* The following code do the right thing even if
1599 n_char_ranges is zero (i.e. no data in
1600 CHAR_RANGES). */
7e68b0ea 1601 for (i = 0; i < n_char_ranges; i += 2)
e39091b6 1602 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
7e68b0ea
RS
1603 break;
1604 if (!(negate ^ (i < n_char_ranges)))
1605 break;
7e68b0ea 1606 }
e39091b6 1607 INC_BOTH (pos, pos_byte);
7e68b0ea
RS
1608 }
1609 else
1610 while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
1611 pos++;
195d1361
RS
1612 }
1613 else
1614 {
7e68b0ea
RS
1615 if (multibyte)
1616 while (pos > XINT (lim))
1617 {
e39091b6
KH
1618 int prev_pos_byte = pos_byte;
1619
1620 DEC_POS (prev_pos_byte);
1621 if (!fastmap[(c = FETCH_BYTE (prev_pos_byte))])
1622 break;
1623
1624 /* See the comment in the previous similar code. */
1625 if (BASE_LEADING_CODE_P (c)
1626 && (c = FETCH_MULTIBYTE_CHAR (prev_pos_byte),
1627 ! SINGLE_BYTE_CHAR_P (c)))
7e68b0ea 1628 {
e39091b6
KH
1629 for (i = 0; i < n_char_ranges; i += 2)
1630 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1631 break;
1632 if (!(negate ^ (i < n_char_ranges)))
1633 break;
7e68b0ea 1634 }
e39091b6
KH
1635 pos--;
1636 pos_byte = prev_pos_byte;
7e68b0ea
RS
1637 }
1638 else
1639 while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
1640 pos--;
195d1361
RS
1641 }
1642 }
7e68b0ea 1643
6a140a74
RS
1644#if 0 /* Not needed now that a position in mid-character
1645 cannot be specified in Lisp. */
7e68b0ea
RS
1646 if (multibyte
1647 /* INC_POS or DEC_POS might have moved POS over LIM. */
1648 && (forwardp ? (pos > XINT (lim)) : (pos < XINT (lim))))
1649 pos = XINT (lim);
6a140a74 1650#endif
7e68b0ea 1651
6a140a74
RS
1652 if (! multibyte)
1653 pos_byte = pos;
1654
1655 SET_PT_BOTH (pos, pos_byte);
195d1361
RS
1656 immediate_quit = 0;
1657
1658 return make_number (PT - start_point);
1659 }
1660}
1661\f
95ff8dfc
RS
1662/* Jump over a comment, assuming we are at the beginning of one.
1663 FROM is the current position.
1664 FROM_BYTE is the bytepos corresponding to FROM.
1665 Do not move past STOP (a charpos).
1666 The comment over which we have to jump is of style STYLE
1667 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1668 NESTING should be positive to indicate the nesting at the beginning
1669 for nested comments and should be zero or negative else.
1670 ST_COMMENT_STYLE cannot be nested.
1671 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1672 (or 0 If the search cannot start in the middle of a two-character).
1673
1674 If successful, return 1 and store the charpos of the comment's end
1675 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1676 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1677 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1678 (as defined for state.incomment) in *INCOMMENT_PTR.
1679
1680 The comment end is the last character of the comment rather than the
1681 character just after the comment.
1682
1683 Global syntax data is assumed to initially be valid for FROM and
1684 remains valid for forward search starting at the returned position. */
1685
1686static int
1687forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1688 charpos_ptr, bytepos_ptr, incomment_ptr)
1689 int from, from_byte, stop;
1690 int nesting, style, prev_syntax;
1691 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1692{
1693 register int c, c1;
1694 register enum syntaxcode code;
1695 register int syntax;
1696
1697 if (nesting <= 0) nesting = -1;
1698
1699 /* Enter the loop in the middle so that we find
1700 a 2-char comment ender if we start in the middle of it. */
1701 syntax = prev_syntax;
1702 if (syntax != 0) goto forw_incomment;
1703
1704 while (1)
1705 {
1706 if (from == stop)
1707 {
1708 *incomment_ptr = nesting;
1709 *charpos_ptr = from;
1710 *bytepos_ptr = from_byte;
1711 return 0;
1712 }
1713 c = FETCH_CHAR (from_byte);
1714 syntax = SYNTAX_WITH_FLAGS (c);
1715 code = syntax & 0xff;
1716 if (code == Sendcomment
1717 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1718 && --nesting <= 0)
1719 /* we have encountered a comment end of the same style
1720 as the comment sequence which began this comment
1721 section */
1722 break;
1723 if (code == Scomment_fence
1724 && style == ST_COMMENT_STYLE)
1725 /* we have encountered a comment end of the same style
1726 as the comment sequence which began this comment
1727 section. */
1728 break;
1729 if (nesting > 0
1730 && code == Scomment
1731 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1732 /* we have encountered a nested comment of the same style
1733 as the comment sequence which began this comment section */
1734 nesting++;
1735 INC_BOTH (from, from_byte);
1736 UPDATE_SYNTAX_TABLE_FORWARD (from);
1737
1738 forw_incomment:
1739 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1740 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1741 && (c1 = FETCH_CHAR (from_byte),
1742 SYNTAX_COMEND_SECOND (c1)))
4ffe723b
GM
1743 {
1744 if (--nesting <= 0)
1745 /* we have encountered a comment end of the same style
1746 as the comment sequence which began this comment
1747 section */
1748 break;
1749 else
1750 {
1751 INC_BOTH (from, from_byte);
1752 UPDATE_SYNTAX_TABLE_FORWARD (from);
1753 }
1754 }
95ff8dfc
RS
1755 if (nesting > 0
1756 && from < stop
1757 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1758 && (c1 = FETCH_CHAR (from_byte),
1759 SYNTAX_COMMENT_STYLE (c1) == style
1760 && SYNTAX_COMSTART_SECOND (c1)))
1761 /* we have encountered a nested comment of the same style
1762 as the comment sequence which began this comment
1763 section */
1764 {
1765 INC_BOTH (from, from_byte);
1766 UPDATE_SYNTAX_TABLE_FORWARD (from);
1767 nesting++;
1768 }
1769 }
1770 *charpos_ptr = from;
1771 *bytepos_ptr = from_byte;
1772 return 1;
1773}
1774
b3cfe0c8
RS
1775DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1776 "Move forward across up to N comments. If N is negative, move backward.\n\
b3cfe0c8 1777Stop scanning if we find something other than a comment or whitespace.\n\
c81a3712 1778Set point to where scanning stops.\n\
b3cfe0c8
RS
1779If N comments are found as expected, with nothing except whitespace\n\
1780between them, return t; otherwise return nil.")
1781 (count)
840f481c 1782 Lisp_Object count;
b3cfe0c8
RS
1783{
1784 register int from;
6a140a74 1785 int from_byte;
b3cfe0c8 1786 register int stop;
8ea151b2 1787 register int c, c1;
b3cfe0c8
RS
1788 register enum syntaxcode code;
1789 int comstyle = 0; /* style of comment encountered */
95ff8dfc 1790 int comnested = 0; /* whether the comment is nestable or not */
be720845 1791 int found;
840f481c 1792 int count1;
6a140a74 1793 int out_charpos, out_bytepos;
95ff8dfc 1794 int dummy;
840f481c
RS
1795
1796 CHECK_NUMBER (count, 0);
1797 count1 = XINT (count);
195d1361 1798 stop = count1 > 0 ? ZV : BEGV;
b3cfe0c8
RS
1799
1800 immediate_quit = 1;
1801 QUIT;
1802
1803 from = PT;
6a140a74 1804 from_byte = PT_BYTE;
b3cfe0c8 1805
195d1361 1806 SETUP_SYNTAX_TABLE (from, count1);
840f481c 1807 while (count1 > 0)
b3cfe0c8 1808 {
04882296 1809 do
b3cfe0c8 1810 {
f902a008
RS
1811 int comstart_first;
1812
04882296
KH
1813 if (from == stop)
1814 {
ef316cf0 1815 SET_PT_BOTH (from, from_byte);
b7e6e612 1816 immediate_quit = 0;
04882296
KH
1817 return Qnil;
1818 }
6a140a74 1819 c = FETCH_CHAR (from_byte);
b3cfe0c8 1820 code = SYNTAX (c);
f902a008 1821 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 1822 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 1823 comstyle = SYNTAX_COMMENT_STYLE (c);
6a140a74 1824 INC_BOTH (from, from_byte);
f902a008 1825 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 1826 if (from < stop && comstart_first
6a140a74 1827 && (c1 = FETCH_CHAR (from_byte),
8ea151b2 1828 SYNTAX_COMSTART_SECOND (c1)))
b3cfe0c8 1829 {
7fc8191e
RS
1830 /* We have encountered a comment start sequence and we
1831 are ignoring all text inside comments. We must record
b3cfe0c8
RS
1832 the comment style this sequence begins so that later,
1833 only a comment end of the same style actually ends
7fc8191e 1834 the comment section. */
b3cfe0c8 1835 code = Scomment;
8ea151b2 1836 comstyle = SYNTAX_COMMENT_STYLE (c1);
95ff8dfc 1837 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 1838 INC_BOTH (from, from_byte);
95ff8dfc 1839 UPDATE_SYNTAX_TABLE_FORWARD (from);
b3cfe0c8 1840 }
1aa963c8
SM
1841 /* FIXME: here we ignore 2-char endcomments while we don't
1842 when going backwards. */
04882296
KH
1843 }
1844 while (code == Swhitespace || code == Sendcomment);
6a140a74 1845
da71ab91
KH
1846 if (code == Scomment_fence)
1847 comstyle = ST_COMMENT_STYLE;
1848 else if (code != Scomment)
04882296
KH
1849 {
1850 immediate_quit = 0;
6a140a74 1851 DEC_BOTH (from, from_byte);
ef316cf0 1852 SET_PT_BOTH (from, from_byte);
04882296
KH
1853 return Qnil;
1854 }
1855 /* We're at the start of a comment. */
95ff8dfc
RS
1856 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1857 &out_charpos, &out_bytepos, &dummy);
1858 from = out_charpos; from_byte = out_bytepos;
1859 if (!found)
04882296 1860 {
95ff8dfc
RS
1861 immediate_quit = 0;
1862 SET_PT_BOTH (from, from_byte);
1863 return Qnil;
b3cfe0c8 1864 }
95ff8dfc
RS
1865 INC_BOTH (from, from_byte);
1866 UPDATE_SYNTAX_TABLE_FORWARD (from);
04882296 1867 /* We have skipped one comment. */
840f481c 1868 count1--;
b3cfe0c8
RS
1869 }
1870
840f481c 1871 while (count1 < 0)
b3cfe0c8 1872 {
b9145dbb 1873 while (1)
b3cfe0c8 1874 {
f902a008
RS
1875 int quoted, comstart_second;
1876
b9145dbb
RS
1877 if (from <= stop)
1878 {
6a140a74 1879 SET_PT_BOTH (BEGV, BEGV_BYTE);
b9145dbb
RS
1880 immediate_quit = 0;
1881 return Qnil;
1882 }
b3cfe0c8 1883
6a140a74 1884 DEC_BOTH (from, from_byte);
f902a008 1885 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
6a140a74 1886 quoted = char_quoted (from, from_byte);
b3cfe0c8 1887 if (quoted)
195d1361 1888 {
6a140a74
RS
1889 DEC_BOTH (from, from_byte);
1890 goto leave;
195d1361 1891 }
6a140a74 1892 c = FETCH_CHAR (from_byte);
b3cfe0c8
RS
1893 code = SYNTAX (c);
1894 comstyle = 0;
95ff8dfc 1895 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
1896 if (code == Sendcomment)
1897 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 1898 comstart_second = SYNTAX_COMSTART_SECOND (c);
b3cfe0c8 1899 if (from > stop && SYNTAX_COMEND_SECOND (c)
f902a008 1900 && prev_char_comend_first (from, from_byte)
89c6809a 1901 && !char_quoted (from - 1, dec_bytepos (from_byte)))
b3cfe0c8 1902 {
7fc8191e 1903 /* We must record the comment style encountered so that
b3cfe0c8 1904 later, we can match only the proper comment begin
7fc8191e 1905 sequence of the same style. */
6a140a74 1906 DEC_BOTH (from, from_byte);
f902a008
RS
1907 code = Sendcomment;
1908 /* Calling char_quoted, above, set up global syntax position
1909 at the new value of FROM. */
95ff8dfc
RS
1910 c1 = FETCH_CHAR (from_byte);
1911 comstyle = SYNTAX_COMMENT_STYLE (c1);
1912 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
b3cfe0c8 1913 }
f902a008
RS
1914 if (from > stop && comstart_second
1915 && prev_char_comstart_first (from, from_byte)
89c6809a 1916 && !char_quoted (from - 1, dec_bytepos (from_byte)))
ae03af34 1917 {
ae03af34 1918 code = Scomment;
6a140a74 1919 DEC_BOTH (from, from_byte);
ae03af34 1920 }
b3cfe0c8 1921
195d1361
RS
1922 if (code == Scomment_fence)
1923 {
1924 /* Skip until first preceding unquoted comment_fence. */
6a140a74 1925 int found = 0, ini = from, ini_byte = from_byte;
195d1361 1926
6a140a74 1927 while (1)
195d1361 1928 {
6a140a74
RS
1929 DEC_BOTH (from, from_byte);
1930 if (from == stop)
1931 break;
195d1361 1932 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74
RS
1933 c = FETCH_CHAR (from_byte);
1934 if (SYNTAX (c) == Scomment_fence
1935 && !char_quoted (from, from_byte))
195d1361
RS
1936 {
1937 found = 1;
1938 break;
1939 }
1940 }
1941 if (found == 0)
1942 {
1943 from = ini; /* Set point to ini + 1. */
6a140a74 1944 from_byte = ini_byte;
195d1361
RS
1945 goto leave;
1946 }
1947 }
1948 else if (code == Sendcomment)
b3cfe0c8 1949 {
95ff8dfc 1950 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 1951 &out_charpos, &out_bytepos);
1aa963c8
SM
1952 if (found == -1)
1953 {
2b927d02
SM
1954#if 0 /* cc-mode (and maybe others) relies on the bogus behavior. */
1955 /* Failure: we should go back to the end of this
1aa963c8
SM
1956 not-quite-endcomment. */
1957 if (SYNTAX(c) != code)
1958 /* It was a two-char Sendcomment. */
1959 INC_BOTH (from, from_byte);
1960 goto leave;
2b927d02 1961#endif
1aa963c8 1962 }
2b927d02
SM
1963 else
1964 /* We have skipped one comment. */
1965 from = out_charpos, from_byte = out_bytepos;
1966 break;
b3cfe0c8 1967 }
195d1361 1968 else if (code != Swhitespace && code != Scomment)
b3cfe0c8 1969 {
195d1361 1970 leave:
b3cfe0c8 1971 immediate_quit = 0;
6a140a74 1972 INC_BOTH (from, from_byte);
ef316cf0 1973 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
1974 return Qnil;
1975 }
1976 }
1977
840f481c 1978 count1++;
b3cfe0c8
RS
1979 }
1980
ef316cf0 1981 SET_PT_BOTH (from, from_byte);
b3cfe0c8
RS
1982 immediate_quit = 0;
1983 return Qt;
1984}
1985\f
bd25db08
KH
1986/* Return syntax code of character C if C is a single byte character
1987 or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
1988
1989#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
1990 ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
1991 ? SYNTAX (c) : Ssymbol)
1992
6a140a74 1993static Lisp_Object
8489eb67
RS
1994scan_lists (from, count, depth, sexpflag)
1995 register int from;
1996 int count, depth, sexpflag;
1997{
1998 Lisp_Object val;
195d1361 1999 register int stop = count > 0 ? ZV : BEGV;
93da5fff
KH
2000 register int c, c1;
2001 int stringterm;
8489eb67
RS
2002 int quoted;
2003 int mathexit = 0;
93da5fff 2004 register enum syntaxcode code, temp_code;
195d1361 2005 int min_depth = depth; /* Err out if depth gets less than this. */
e5d4f4dc 2006 int comstyle = 0; /* style of comment encountered */
95ff8dfc 2007 int comnested = 0; /* whether the comment is nestable or not */
93da5fff 2008 int temp_pos;
7bf5e9e4 2009 int last_good = from;
195d1361 2010 int found;
c3907a7e 2011 int from_byte;
6a140a74 2012 int out_bytepos, out_charpos;
95ff8dfc 2013 int temp, dummy;
bd25db08 2014 int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
8489eb67
RS
2015
2016 if (depth > 0) min_depth = 0;
2017
c3907a7e
RS
2018 if (from > ZV) from = ZV;
2019 if (from < BEGV) from = BEGV;
2020
2021 from_byte = CHAR_TO_BYTE (from);
2022
8489eb67
RS
2023 immediate_quit = 1;
2024 QUIT;
2025
195d1361 2026 SETUP_SYNTAX_TABLE (from, count);
8489eb67
RS
2027 while (count > 0)
2028 {
8489eb67
RS
2029 while (from < stop)
2030 {
f902a008 2031 int comstart_first, prefix;
195d1361 2032 UPDATE_SYNTAX_TABLE_FORWARD (from);
6a140a74 2033 c = FETCH_CHAR (from_byte);
bd25db08 2034 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
f902a008 2035 comstart_first = SYNTAX_COMSTART_FIRST (c);
95ff8dfc 2036 comnested = SYNTAX_COMMENT_NESTED (c);
e6365855 2037 comstyle = SYNTAX_COMMENT_STYLE (c);
f902a008 2038 prefix = SYNTAX_PREFIX (c);
7bf5e9e4
RS
2039 if (depth == min_depth)
2040 last_good = from;
6a140a74 2041 INC_BOTH (from, from_byte);
195d1361 2042 UPDATE_SYNTAX_TABLE_FORWARD (from);
f902a008 2043 if (from < stop && comstart_first
6a140a74 2044 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))
8489eb67 2045 && parse_sexp_ignore_comments)
e5d4f4dc
RS
2046 {
2047 /* we have encountered a comment start sequence and we
195d1361 2048 are ignoring all text inside comments. We must record
e5d4f4dc
RS
2049 the comment style this sequence begins so that later,
2050 only a comment end of the same style actually ends
2051 the comment section */
2052 code = Scomment;
95ff8dfc
RS
2053 c1 = FETCH_CHAR (from_byte);
2054 comstyle = SYNTAX_COMMENT_STYLE (c1);
2055 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
6a140a74 2056 INC_BOTH (from, from_byte);
f902a008 2057 UPDATE_SYNTAX_TABLE_FORWARD (from);
e5d4f4dc
RS
2058 }
2059
f902a008 2060 if (prefix)
8489eb67
RS
2061 continue;
2062
0220c518 2063 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2064 {
2065 case Sescape:
2066 case Scharquote:
2067 if (from == stop) goto lose;
6a140a74 2068 INC_BOTH (from, from_byte);
8489eb67
RS
2069 /* treat following character as a word constituent */
2070 case Sword:
2071 case Ssymbol:
2072 if (depth || !sexpflag) break;
195d1361 2073 /* This word counts as a sexp; return at end of it. */
8489eb67
RS
2074 while (from < stop)
2075 {
195d1361 2076 UPDATE_SYNTAX_TABLE_FORWARD (from);
7c7ca3d2
RS
2077
2078 /* Some compilers can't handle this inside the switch. */
bd25db08
KH
2079 c = FETCH_CHAR (from_byte);
2080 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2081 switch (temp)
8489eb67
RS
2082 {
2083 case Scharquote:
2084 case Sescape:
6a140a74 2085 INC_BOTH (from, from_byte);
8489eb67
RS
2086 if (from == stop) goto lose;
2087 break;
2088 case Sword:
2089 case Ssymbol:
2090 case Squote:
2091 break;
2092 default:
2093 goto done;
2094 }
6a140a74 2095 INC_BOTH (from, from_byte);
8489eb67
RS
2096 }
2097 goto done;
2098
195d1361 2099 case Scomment_fence:
95ff8dfc
RS
2100 comstyle = ST_COMMENT_STYLE;
2101 /* FALLTHROUGH */
2102 case Scomment:
8489eb67 2103 if (!parse_sexp_ignore_comments) break;
95ff8dfc
RS
2104 UPDATE_SYNTAX_TABLE_FORWARD (from);
2105 found = forw_comment (from, from_byte, stop,
2106 comnested, comstyle, 0,
2107 &out_charpos, &out_bytepos, &dummy);
2108 from = out_charpos, from_byte = out_bytepos;
2109 if (!found)
8489eb67 2110 {
95ff8dfc
RS
2111 if (depth == 0)
2112 goto done;
2113 goto lose;
8489eb67 2114 }
95ff8dfc
RS
2115 INC_BOTH (from, from_byte);
2116 UPDATE_SYNTAX_TABLE_FORWARD (from);
8489eb67
RS
2117 break;
2118
2119 case Smath:
2120 if (!sexpflag)
2121 break;
6a140a74
RS
2122 if (from != stop && c == FETCH_CHAR (from_byte))
2123 {
2124 INC_BOTH (from, from_byte);
2125 }
8489eb67
RS
2126 if (mathexit)
2127 {
2128 mathexit = 0;
2129 goto close1;
2130 }
2131 mathexit = 1;
2132
2133 case Sopen:
2134 if (!++depth) goto done;
2135 break;
2136
2137 case Sclose:
2138 close1:
2139 if (!--depth) goto done;
2140 if (depth < min_depth)
7bf5e9e4
RS
2141 Fsignal (Qscan_error,
2142 Fcons (build_string ("Containing expression ends prematurely"),
2143 Fcons (make_number (last_good),
2144 Fcons (make_number (from), Qnil))));
8489eb67
RS
2145 break;
2146
2147 case Sstring:
195d1361 2148 case Sstring_fence:
ef316cf0 2149 temp_pos = dec_bytepos (from_byte);
93da5fff 2150 stringterm = FETCH_CHAR (temp_pos);
8489eb67
RS
2151 while (1)
2152 {
2153 if (from >= stop) goto lose;
195d1361 2154 UPDATE_SYNTAX_TABLE_FORWARD (from);
bd25db08 2155 c = FETCH_CHAR (from_byte);
195d1361 2156 if (code == Sstring
bd25db08
KH
2157 ? c == stringterm
2158 : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
195d1361 2159 break;
7c7ca3d2
RS
2160
2161 /* Some compilers can't handle this inside the switch. */
bd25db08 2162 temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7c7ca3d2 2163 switch (temp)
8489eb67
RS
2164 {
2165 case Scharquote:
2166 case Sescape:
6a140a74 2167 INC_BOTH (from, from_byte);
8489eb67 2168 }
6a140a74 2169 INC_BOTH (from, from_byte);
8489eb67 2170 }
6a140a74 2171 INC_BOTH (from, from_byte);
8489eb67
RS
2172 if (!depth && sexpflag) goto done;
2173 break;
2174 }
2175 }
2176
2177 /* Reached end of buffer. Error if within object, return nil if between */
2178 if (depth) goto lose;
2179
2180 immediate_quit = 0;
2181 return Qnil;
2182
2183 /* End of object reached */
2184 done:
2185 count--;
2186 }
2187
2188 while (count < 0)
2189 {
8489eb67
RS
2190 while (from > stop)
2191 {
6a140a74 2192 DEC_BOTH (from, from_byte);
195d1361 2193 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2194 c = FETCH_CHAR (from_byte);
bd25db08 2195 code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
7bf5e9e4
RS
2196 if (depth == min_depth)
2197 last_good = from;
7fc8191e 2198 comstyle = 0;
95ff8dfc 2199 comnested = SYNTAX_COMMENT_NESTED (c);
7fc8191e
RS
2200 if (code == Sendcomment)
2201 comstyle = SYNTAX_COMMENT_STYLE (c);
8489eb67 2202 if (from > stop && SYNTAX_COMEND_SECOND (c)
1674d9a2 2203 && prev_char_comend_first (from, from_byte)
8489eb67 2204 && parse_sexp_ignore_comments)
e5d4f4dc 2205 {
f902a008 2206 /* We must record the comment style encountered so that
e5d4f4dc 2207 later, we can match only the proper comment begin
f902a008 2208 sequence of the same style. */
6a140a74 2209 DEC_BOTH (from, from_byte);
f902a008
RS
2210 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2211 code = Sendcomment;
95ff8dfc
RS
2212 c1 = FETCH_CHAR (from_byte);
2213 comstyle = SYNTAX_COMMENT_STYLE (c1);
2214 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
e5d4f4dc
RS
2215 }
2216
9828a477 2217 /* Quoting turns anything except a comment-ender
e6365855 2218 into a word character. Note that this cannot be true
f902a008 2219 if we decremented FROM in the if-statement above. */
9828a477
KH
2220 if (code != Sendcomment && char_quoted (from, from_byte))
2221 code = Sword;
2222 else if (SYNTAX_PREFIX (c))
8489eb67
RS
2223 continue;
2224
9828a477 2225 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2226 {
2227 case Sword:
2228 case Ssymbol:
9828a477
KH
2229 case Sescape:
2230 case Scharquote:
8489eb67 2231 if (depth || !sexpflag) break;
195d1361
RS
2232 /* This word counts as a sexp; count object finished
2233 after passing it. */
8489eb67
RS
2234 while (from > stop)
2235 {
6a140a74 2236 temp_pos = from_byte;
ef316cf0
RS
2237 if (! NILP (current_buffer->enable_multibyte_characters))
2238 DEC_POS (temp_pos);
2239 else
2240 temp_pos--;
6a140a74 2241 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
9828a477 2242 c1 = FETCH_CHAR (temp_pos);
bd25db08 2243 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
9828a477
KH
2244 /* Don't allow comment-end to be quoted. */
2245 if (temp_code == Sendcomment)
2246 goto done2;
6a140a74 2247 quoted = char_quoted (from - 1, temp_pos);
8489eb67 2248 if (quoted)
93da5fff 2249 {
6a140a74 2250 DEC_BOTH (from, from_byte);
ef316cf0 2251 temp_pos = dec_bytepos (temp_pos);
6a140a74 2252 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff
KH
2253 }
2254 c1 = FETCH_CHAR (temp_pos);
bd25db08 2255 temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
93da5fff
KH
2256 if (! (quoted || temp_code == Sword
2257 || temp_code == Ssymbol
2258 || temp_code == Squote))
8489eb67 2259 goto done2;
6a140a74 2260 DEC_BOTH (from, from_byte);
8489eb67
RS
2261 }
2262 goto done2;
2263
2264 case Smath:
2265 if (!sexpflag)
2266 break;
ef316cf0 2267 temp_pos = dec_bytepos (from_byte);
6a140a74 2268 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
93da5fff 2269 if (from != stop && c == FETCH_CHAR (temp_pos))
6a140a74 2270 DEC_BOTH (from, from_byte);
8489eb67
RS
2271 if (mathexit)
2272 {
2273 mathexit = 0;
2274 goto open2;
2275 }
2276 mathexit = 1;
2277
2278 case Sclose:
2279 if (!++depth) goto done2;
2280 break;
2281
2282 case Sopen:
2283 open2:
2284 if (!--depth) goto done2;
2285 if (depth < min_depth)
7bf5e9e4
RS
2286 Fsignal (Qscan_error,
2287 Fcons (build_string ("Containing expression ends prematurely"),
2288 Fcons (make_number (last_good),
2289 Fcons (make_number (from), Qnil))));
8489eb67
RS
2290 break;
2291
2292 case Sendcomment:
2293 if (!parse_sexp_ignore_comments)
2294 break;
95ff8dfc 2295 found = back_comment (from, from_byte, stop, comnested, comstyle,
6a140a74 2296 &out_charpos, &out_bytepos);
1aa963c8
SM
2297 /* FIXME: if found == -1, then it really wasn't a comment-end.
2298 For single-char Sendcomment, we can't do much about it apart
2299 from skipping the char.
2300 For 2-char endcomments, we could try again, taking both
2301 chars as separate entities, but it's a lot of trouble
2302 for very little gain, so we don't bother either. -sm */
6a140a74
RS
2303 if (found != -1)
2304 from = out_charpos, from_byte = out_bytepos;
8489eb67
RS
2305 break;
2306
195d1361
RS
2307 case Scomment_fence:
2308 case Sstring_fence:
2309 while (1)
2310 {
6a140a74 2311 DEC_BOTH (from, from_byte);
195d1361
RS
2312 if (from == stop) goto lose;
2313 UPDATE_SYNTAX_TABLE_BACKWARD (from);
6a140a74 2314 if (!char_quoted (from, from_byte)
bd25db08
KH
2315 && (c = FETCH_CHAR (from_byte),
2316 SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
195d1361
RS
2317 break;
2318 }
2319 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2320 break;
2321
8489eb67 2322 case Sstring:
6a140a74 2323 stringterm = FETCH_CHAR (from_byte);
8489eb67
RS
2324 while (1)
2325 {
2326 if (from == stop) goto lose;
6a140a74 2327 temp_pos = from_byte;
ef316cf0
RS
2328 if (! NILP (current_buffer->enable_multibyte_characters))
2329 DEC_POS (temp_pos);
2330 else
2331 temp_pos--;
6a140a74
RS
2332 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2333 if (!char_quoted (from - 1, temp_pos)
93da5fff 2334 && stringterm == FETCH_CHAR (temp_pos))
8489eb67 2335 break;
6a140a74 2336 DEC_BOTH (from, from_byte);
8489eb67 2337 }
6a140a74 2338 DEC_BOTH (from, from_byte);
8489eb67
RS
2339 if (!depth && sexpflag) goto done2;
2340 break;
2341 }
2342 }
2343
2344 /* Reached start of buffer. Error if within object, return nil if between */
2345 if (depth) goto lose;
2346
2347 immediate_quit = 0;
2348 return Qnil;
2349
2350 done2:
2351 count++;
2352 }
2353
2354
2355 immediate_quit = 0;
1e142fb7 2356 XSETFASTINT (val, from);
8489eb67
RS
2357 return val;
2358
2359 lose:
7bf5e9e4
RS
2360 Fsignal (Qscan_error,
2361 Fcons (build_string ("Unbalanced parentheses"),
2362 Fcons (make_number (last_good),
2363 Fcons (make_number (from), Qnil))));
2364
8489eb67
RS
2365 /* NOTREACHED */
2366}
2367
8489eb67
RS
2368DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2369 "Scan from character number FROM by COUNT lists.\n\
2370Returns the character number of the position thus found.\n\
2371\n\
2372If DEPTH is nonzero, paren depth begins counting from that value,\n\
2373only places where the depth in parentheses becomes zero\n\
2374are candidates for stopping; COUNT such places are counted.\n\
2375Thus, a positive value for DEPTH means go out levels.\n\
2376\n\
2377Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2378\n\
2379If the beginning or end of (the accessible part of) the buffer is reached\n\
2380and the depth is wrong, an error is signaled.\n\
2381If the depth is right but the count is not used up, nil is returned.")
2382 (from, count, depth)
2383 Lisp_Object from, count, depth;
2384{
2385 CHECK_NUMBER (from, 0);
2386 CHECK_NUMBER (count, 1);
2387 CHECK_NUMBER (depth, 2);
2388
2389 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2390}
2391
2392DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2393 "Scan from character number FROM by COUNT balanced expressions.\n\
2394If COUNT is negative, scan backwards.\n\
2395Returns the character number of the position thus found.\n\
2396\n\
2397Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
2398\n\
2399If the beginning or end of (the accessible part of) the buffer is reached\n\
2400in the middle of a parenthetical grouping, an error is signaled.\n\
2401If the beginning or end is reached between groupings\n\
2402but before count is used up, nil is returned.")
2403 (from, count)
2404 Lisp_Object from, count;
2405{
2406 CHECK_NUMBER (from, 0);
2407 CHECK_NUMBER (count, 1);
2408
2409 return scan_lists (XINT (from), XINT (count), 0, 1);
2410}
2411
2412DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2413 0, 0, 0,
2414 "Move point backward over any number of chars with prefix syntax.\n\
2415This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
2416 ()
2417{
2418 int beg = BEGV;
6a140a74
RS
2419 int opoint = PT;
2420 int opoint_byte = PT_BYTE;
6ec8bbd2 2421 int pos = PT;
6a140a74 2422 int pos_byte = PT_BYTE;
93da5fff 2423 int c;
93da5fff 2424
f902a008 2425 if (pos <= beg)
195d1361 2426 {
f902a008
RS
2427 SET_PT_BOTH (opoint, opoint_byte);
2428
2429 return Qnil;
195d1361 2430 }
8489eb67 2431
f902a008
RS
2432 SETUP_SYNTAX_TABLE (pos, -1);
2433
6a140a74
RS
2434 DEC_BOTH (pos, pos_byte);
2435
1fd3172d 2436 while (!char_quoted (pos, pos_byte)
195d1361 2437 /* Previous statement updates syntax table. */
6a140a74 2438 && ((c = FETCH_CHAR (pos_byte), SYNTAX (c) == Squote)
93da5fff
KH
2439 || SYNTAX_PREFIX (c)))
2440 {
1fd3172d
RS
2441 opoint = pos;
2442 opoint_byte = pos_byte;
2443
2444 if (pos + 1 > beg)
2445 DEC_BOTH (pos, pos_byte);
93da5fff 2446 }
8489eb67 2447
6a140a74 2448 SET_PT_BOTH (opoint, opoint_byte);
8489eb67
RS
2449
2450 return Qnil;
2451}
2452\f
6a140a74 2453/* Parse forward from FROM / FROM_BYTE to END,
e5d4f4dc
RS
2454 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2455 and return a description of the state of the parse at END.
c81a3712 2456 If STOPBEFORE is nonzero, stop at the start of an atom.
644ea4df
RS
2457 If COMMENTSTOP is 1, stop at the start of a comment.
2458 If COMMENTSTOP is -1, stop at the start or end of a comment,
2459 after the beginning of a string, or after the end of a string. */
8489eb67 2460
340f92b5 2461static void
6a140a74 2462scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
c81a3712 2463 stopbefore, oldstate, commentstop)
e5d4f4dc 2464 struct lisp_parse_state *stateptr;
8489eb67
RS
2465 register int from;
2466 int end, targetdepth, stopbefore;
2467 Lisp_Object oldstate;
c81a3712 2468 int commentstop;
8489eb67
RS
2469{
2470 struct lisp_parse_state state;
2471
2472 register enum syntaxcode code;
95ff8dfc
RS
2473 int c1;
2474 int comnested;
8489eb67
RS
2475 struct level { int last, prev; };
2476 struct level levelstart[100];
2477 register struct level *curlevel = levelstart;
2478 struct level *endlevel = levelstart + 100;
8489eb67
RS
2479 register int depth; /* Paren depth of current scanning location.
2480 level - levelstart equals this except
2481 when the depth becomes negative. */
2482 int mindepth; /* Lowest DEPTH value seen. */
2483 int start_quoted = 0; /* Nonzero means starting after a char quote */
2484 Lisp_Object tem;
93da5fff 2485 int prev_from; /* Keep one character before FROM. */
6a140a74 2486 int prev_from_byte;
1fd3172d 2487 int prev_from_syntax;
195d1361
RS
2488 int boundary_stop = commentstop == -1;
2489 int nofence;
95ff8dfc
RS
2490 int found;
2491 int out_bytepos, out_charpos;
7c7ca3d2 2492 int temp;
93da5fff
KH
2493
2494 prev_from = from;
6a140a74
RS
2495 prev_from_byte = from_byte;
2496 if (from != BEGV)
2497 DEC_BOTH (prev_from, prev_from_byte);
93da5fff
KH
2498
2499 /* Use this macro instead of `from++'. */
6a140a74
RS
2500#define INC_FROM \
2501do { prev_from = from; \
2502 prev_from_byte = from_byte; \
1fd3172d
RS
2503 prev_from_syntax \
2504 = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte)); \
ef316cf0 2505 INC_BOTH (from, from_byte); \
1fd3172d 2506 UPDATE_SYNTAX_TABLE_FORWARD (from); \
6a140a74 2507 } while (0)
8489eb67
RS
2508
2509 immediate_quit = 1;
2510 QUIT;
2511
265a9e55 2512 if (NILP (oldstate))
8489eb67
RS
2513 {
2514 depth = 0;
2515 state.instring = -1;
2516 state.incomment = 0;
195d1361
RS
2517 state.comstyle = 0; /* comment style a by default. */
2518 state.comstr_start = -1; /* no comment/string seen. */
8489eb67
RS
2519 }
2520 else
2521 {
2522 tem = Fcar (oldstate);
265a9e55 2523 if (!NILP (tem))
8489eb67
RS
2524 depth = XINT (tem);
2525 else
2526 depth = 0;
2527
2528 oldstate = Fcdr (oldstate);
2529 oldstate = Fcdr (oldstate);
2530 oldstate = Fcdr (oldstate);
2531 tem = Fcar (oldstate);
195d1361 2532 /* Check whether we are inside string_fence-style string: */
e76f1c44
GM
2533 state.instring = (!NILP (tem)
2534 ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2535 : -1);
8489eb67
RS
2536
2537 oldstate = Fcdr (oldstate);
2538 tem = Fcar (oldstate);
e76f1c44
GM
2539 state.incomment = (!NILP (tem)
2540 ? (INTEGERP (tem) ? XINT (tem) : -1)
95ff8dfc 2541 : 0);
8489eb67
RS
2542
2543 oldstate = Fcdr (oldstate);
2544 tem = Fcar (oldstate);
265a9e55 2545 start_quoted = !NILP (tem);
e5d4f4dc 2546
95ff8dfc 2547 /* if the eighth element of the list is nil, we are in comment
195d1361
RS
2548 style a. If it is non-nil, we are in comment style b */
2549 oldstate = Fcdr (oldstate);
e5d4f4dc 2550 oldstate = Fcdr (oldstate);
195d1361 2551 tem = Fcar (oldstate);
e76f1c44
GM
2552 state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2553 ? ST_COMMENT_STYLE : 1);
195d1361 2554
e5d4f4dc 2555 oldstate = Fcdr (oldstate);
e5d4f4dc 2556 tem = Fcar (oldstate);
195d1361 2557 state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
1a102a58
RS
2558 oldstate = Fcdr (oldstate);
2559 tem = Fcar (oldstate);
2560 while (!NILP (tem)) /* >= second enclosing sexps. */
2561 {
2562 /* curlevel++->last ran into compiler bug on Apollo */
2563 curlevel->last = XINT (Fcar (tem));
2564 if (++curlevel == endlevel)
02010917 2565 curlevel--; /* error ("Nesting too deep for parser"); */
1a102a58
RS
2566 curlevel->prev = -1;
2567 curlevel->last = -1;
2568 tem = Fcdr (tem);
2569 }
8489eb67
RS
2570 }
2571 state.quoted = 0;
2572 mindepth = depth;
2573
2574 curlevel->prev = -1;
2575 curlevel->last = -1;
2576
326b283b
RS
2577 SETUP_SYNTAX_TABLE (prev_from, 1);
2578 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2579 UPDATE_SYNTAX_TABLE_FORWARD (from);
2580
195d1361 2581 /* Enter the loop at a place appropriate for initial state. */
8489eb67 2582
326b283b
RS
2583 if (state.incomment)
2584 goto startincomment;
8489eb67
RS
2585 if (state.instring >= 0)
2586 {
195d1361 2587 nofence = state.instring != ST_STRING_STYLE;
326b283b
RS
2588 if (start_quoted)
2589 goto startquotedinstring;
8489eb67
RS
2590 goto startinstring;
2591 }
326b283b
RS
2592 else if (start_quoted)
2593 goto startquoted;
1fd3172d 2594
ccf89641 2595#if 0 /* This seems to be redundant with the identical code above. */
1fd3172d
RS
2596 SETUP_SYNTAX_TABLE (prev_from, 1);
2597 prev_from_syntax = SYNTAX_WITH_FLAGS (FETCH_CHAR (prev_from_byte));
2598 UPDATE_SYNTAX_TABLE_FORWARD (from);
ccf89641 2599#endif
1fd3172d 2600
8489eb67
RS
2601 while (from < end)
2602 {
93da5fff 2603 INC_FROM;
1fd3172d 2604 code = prev_from_syntax & 0xff;
4c920633 2605
8f9dc2ed 2606 if (code == Scomment)
95ff8dfc 2607 {
d355bd8a 2608 state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
95ff8dfc
RS
2609 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2610 1 : -1);
2611 state.comstr_start = prev_from;
2612 }
4c920633 2613 else if (code == Scomment_fence)
e5d4f4dc
RS
2614 {
2615 /* Record the comment style we have entered so that only
2616 the comment-end sequence of the same style actually
2617 terminates the comment section. */
95ff8dfc
RS
2618 state.comstyle = ST_COMMENT_STYLE;
2619 state.incomment = -1;
195d1361 2620 state.comstr_start = prev_from;
e5d4f4dc 2621 code = Scomment;
e5d4f4dc 2622 }
4c920633 2623 else if (from < end)
1fd3172d 2624 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
95ff8dfc
RS
2625 if (c1 = FETCH_CHAR (from_byte),
2626 SYNTAX_COMSTART_SECOND (c1))
1fd3172d 2627 /* Duplicate code to avoid a complex if-expression
4c920633
RS
2628 which causes trouble for the SGI compiler. */
2629 {
2630 /* Record the comment style we have entered so that only
2631 the comment-end sequence of the same style actually
2632 terminates the comment section. */
95ff8dfc
RS
2633 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2634 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2635 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2636 state.incomment = comnested ? 1 : -1;
4c920633 2637 state.comstr_start = prev_from;
95ff8dfc 2638 INC_FROM;
4c920633
RS
2639 code = Scomment;
2640 }
e5d4f4dc 2641
1fd3172d 2642 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
8489eb67 2643 continue;
0220c518 2644 switch (SWITCH_ENUM_CAST (code))
8489eb67
RS
2645 {
2646 case Sescape:
2647 case Scharquote:
2648 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2649 curlevel->last = prev_from;
8489eb67
RS
2650 startquoted:
2651 if (from == end) goto endquoted;
93da5fff 2652 INC_FROM;
8489eb67
RS
2653 goto symstarted;
2654 /* treat following character as a word constituent */
2655 case Sword:
2656 case Ssymbol:
2657 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2658 curlevel->last = prev_from;
8489eb67
RS
2659 symstarted:
2660 while (from < end)
2661 {
7c7ca3d2
RS
2662 /* Some compilers can't handle this inside the switch. */
2663 temp = SYNTAX (FETCH_CHAR (from_byte));
2664 switch (temp)
8489eb67
RS
2665 {
2666 case Scharquote:
2667 case Sescape:
93da5fff 2668 INC_FROM;
8489eb67
RS
2669 if (from == end) goto endquoted;
2670 break;
2671 case Sword:
2672 case Ssymbol:
2673 case Squote:
2674 break;
2675 default:
2676 goto symdone;
2677 }
93da5fff 2678 INC_FROM;
8489eb67
RS
2679 }
2680 symdone:
2681 curlevel->prev = curlevel->last;
2682 break;
2683
2684 case Scomment:
195d1361 2685 if (commentstop || boundary_stop) goto done;
d355bd8a
SM
2686 startincomment:
2687 /* The (from == BEGV) test was to enter the loop in the middle so
95ff8dfc 2688 that we find a 2-char comment ender even if we start in the
e6365855 2689 middle of it. We don't want to do that if we're just at the
d355bd8a 2690 beginning of the comment (think of (*) ... (*)). */
95ff8dfc
RS
2691 found = forw_comment (from, from_byte, end,
2692 state.incomment, state.comstyle,
e6365855
SM
2693 (from == BEGV || from < state.comstr_start + 3)
2694 ? 0 : prev_from_syntax,
95ff8dfc
RS
2695 &out_charpos, &out_bytepos, &state.incomment);
2696 from = out_charpos; from_byte = out_bytepos;
2697 /* Beware! prev_from and friends are invalid now.
2698 Luckily, the `done' doesn't use them and the INC_FROM
2699 sets them to a sane value without looking at them. */
2700 if (!found) goto done;
d355bd8a 2701 INC_FROM;
8489eb67 2702 state.incomment = 0;
e5d4f4dc 2703 state.comstyle = 0; /* reset the comment style */
195d1361 2704 if (boundary_stop) goto done;
8489eb67
RS
2705 break;
2706
2707 case Sopen:
2708 if (stopbefore) goto stop; /* this arg means stop at sexp start */
2709 depth++;
2710 /* curlevel++->last ran into compiler bug on Apollo */
93da5fff 2711 curlevel->last = prev_from;
8489eb67 2712 if (++curlevel == endlevel)
02010917 2713 curlevel--; /* error ("Nesting too deep for parser"); */
8489eb67
RS
2714 curlevel->prev = -1;
2715 curlevel->last = -1;
30844415 2716 if (targetdepth == depth) goto done;
8489eb67
RS
2717 break;
2718
2719 case Sclose:
2720 depth--;
2721 if (depth < mindepth)
2722 mindepth = depth;
2723 if (curlevel != levelstart)
2724 curlevel--;
2725 curlevel->prev = curlevel->last;
30844415 2726 if (targetdepth == depth) goto done;
8489eb67
RS
2727 break;
2728
2729 case Sstring:
195d1361
RS
2730 case Sstring_fence:
2731 state.comstr_start = from - 1;
8489eb67 2732 if (stopbefore) goto stop; /* this arg means stop at sexp start */
93da5fff 2733 curlevel->last = prev_from;
195d1361 2734 state.instring = (code == Sstring
6a140a74 2735 ? (FETCH_CHAR (prev_from_byte))
195d1361
RS
2736 : ST_STRING_STYLE);
2737 if (boundary_stop) goto done;
8489eb67 2738 startinstring:
195d1361 2739 {
644ea4df 2740 nofence = state.instring != ST_STRING_STYLE;
195d1361 2741
644ea4df
RS
2742 while (1)
2743 {
2744 int c;
195d1361 2745
644ea4df 2746 if (from >= end) goto done;
6a140a74 2747 c = FETCH_CHAR (from_byte);
7c7ca3d2
RS
2748 /* Some compilers can't handle this inside the switch. */
2749 temp = SYNTAX (c);
ccf89641
KH
2750
2751 /* Check TEMP here so that if the char has
2752 a syntax-table property which says it is NOT
2753 a string character, it does not end the string. */
2754 if (nofence && c == state.instring && temp == Sstring)
2755 break;
2756
7c7ca3d2 2757 switch (temp)
644ea4df
RS
2758 {
2759 case Sstring_fence:
2760 if (!nofence) goto string_end;
2761 break;
2762 case Scharquote:
2763 case Sescape:
2764 INC_FROM;
2765 startquotedinstring:
2766 if (from >= end) goto endquoted;
195d1361 2767 }
644ea4df
RS
2768 INC_FROM;
2769 }
195d1361
RS
2770 }
2771 string_end:
8489eb67
RS
2772 state.instring = -1;
2773 curlevel->prev = curlevel->last;
93da5fff 2774 INC_FROM;
195d1361 2775 if (boundary_stop) goto done;
8489eb67
RS
2776 break;
2777
2778 case Smath:
2779 break;
2780 }
2781 }
2782 goto done;
2783
2784 stop: /* Here if stopping before start of sexp. */
93da5fff 2785 from = prev_from; /* We have just fetched the char that starts it; */
8489eb67
RS
2786 goto done; /* but return the position before it. */
2787
2788 endquoted:
2789 state.quoted = 1;
2790 done:
2791 state.depth = depth;
2792 state.mindepth = mindepth;
2793 state.thislevelstart = curlevel->prev;
2794 state.prevlevelstart
2795 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
2796 state.location = from;
1a102a58
RS
2797 state.levelstarts = Qnil;
2798 while (--curlevel >= levelstart)
2799 state.levelstarts = Fcons (make_number (curlevel->last),
2800 state.levelstarts);
8489eb67
RS
2801 immediate_quit = 0;
2802
e5d4f4dc 2803 *stateptr = state;
8489eb67
RS
2804}
2805
2806/* This comment supplies the doc string for parse-partial-sexp,
2807 for make-docfile to see. We cannot put this in the real DEFUN
2808 due to limits in the Unix cpp.
2809
c81a3712 2810DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
8489eb67
RS
2811 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
2812Parsing stops at TO or when certain criteria are met;\n\
2813 point is set to where parsing stops.\n\
2814If fifth arg STATE is omitted or nil,\n\
2815 parsing assumes that FROM is the beginning of a function.\n\
1a102a58 2816Value is a list of ten elements describing final state of parsing:\n\
af50f9e5
RS
2817 0. depth in parens.\n\
2818 1. character address of start of innermost containing list; nil if none.\n\
2819 2. character address of start of last complete sexp terminated.\n\
2820 3. non-nil if inside a string.\n\
195d1361 2821 (it is the character that will terminate the string,\n\
bff37d2a 2822 or t if the string should be terminated by a generic string delimiter.)\n\
95ff8dfc
RS
2823 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2824 else an integer (the current comment nesting).\n\
af50f9e5
RS
2825 5. t if following a quote character.\n\
2826 6. the minimum paren-depth encountered during this scan.\n\
943e731c 2827 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
bff37d2a
RS
2828 should be terminated by a generic comment delimiter.\n\
2829 8. character address of start of comment or string; nil if not in one.\n\
1a102a58 2830 9. Intermediate data for continuation of parsing (subject to change).\n\
8489eb67
RS
2831If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
2832in parentheses becomes equal to TARGETDEPTH.\n\
2833Fourth arg STOPBEFORE non-nil means stop when come to\n\
2834 any character that starts a sexp.\n\
bff37d2a
RS
2835Fifth arg STATE is a nine-element list like what this function returns.\n\
2836 It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
2837 and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
2838Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
943e731c
DL
2839 If it is symbol `syntax-table', stop after the start of a comment or a\n\
2840 string, or after end of a comment or a string.")
c81a3712 2841 (from, to, targetdepth, stopbefore, state, commentstop)
8489eb67
RS
2842*/
2843
c81a3712 2844DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
8489eb67 2845 0 /* See immediately above */)
c81a3712
RS
2846 (from, to, targetdepth, stopbefore, oldstate, commentstop)
2847 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
8489eb67
RS
2848{
2849 struct lisp_parse_state state;
2850 int target;
2851
265a9e55 2852 if (!NILP (targetdepth))
8489eb67
RS
2853 {
2854 CHECK_NUMBER (targetdepth, 3);
2855 target = XINT (targetdepth);
2856 }
2857 else
2858 target = -100000; /* We won't reach this depth */
2859
2860 validate_region (&from, &to);
6a140a74
RS
2861 scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
2862 XINT (to),
c81a3712 2863 target, !NILP (stopbefore), oldstate,
195d1361
RS
2864 (NILP (commentstop)
2865 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
8489eb67
RS
2866
2867 SET_PT (state.location);
2868
2869 return Fcons (make_number (state.depth),
2870 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2871 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
195d1361
RS
2872 Fcons (state.instring >= 0
2873 ? (state.instring == ST_STRING_STYLE
2874 ? Qt : make_number (state.instring)) : Qnil,
95ff8dfc
RS
2875 Fcons (state.incomment < 0 ? Qt :
2876 (state.incomment == 0 ? Qnil :
2877 make_number (state.incomment)),
8489eb67 2878 Fcons (state.quoted ? Qt : Qnil,
bff37d2a
RS
2879 Fcons (make_number (state.mindepth),
2880 Fcons ((state.comstyle
2881 ? (state.comstyle == ST_COMMENT_STYLE
2882 ? Qsyntax_table : Qt) :
2883 Qnil),
0beaf54f
DL
2884 Fcons (((state.incomment
2885 || (state.instring >= 0))
bff37d2a
RS
2886 ? make_number (state.comstr_start)
2887 : Qnil),
1a102a58 2888 Fcons (state.levelstarts, Qnil))))))))));
8489eb67
RS
2889}
2890\f
dfcf069d 2891void
8489eb67
RS
2892init_syntax_once ()
2893{
78f9a1f7 2894 register int i, c;
8ea151b2 2895 Lisp_Object temp;
8489eb67 2896
5ebaddf5
RS
2897 /* This has to be done here, before we call Fmake_char_table. */
2898 Qsyntax_table = intern ("syntax-table");
2899 staticpro (&Qsyntax_table);
2900
2901 /* Intern this now in case it isn't already done.
2902 Setting this variable twice is harmless.
2903 But don't staticpro it here--that is done in alloc.c. */
2904 Qchar_table_extra_slots = intern ("char-table-extra-slots");
2905
93da5fff 2906 /* Create objects which can be shared among syntax tables. */
44d8a793 2907 Vsyntax_code_object = Fmake_vector (make_number (13), Qnil);
93da5fff
KH
2908 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
2909 XVECTOR (Vsyntax_code_object)->contents[i]
2910 = Fcons (make_number (i), Qnil);
2911
5ebaddf5
RS
2912 /* Now we are ready to set up this property, so we can
2913 create syntax tables. */
2914 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
2915
93da5fff 2916 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
8489eb67 2917
5ebaddf5 2918 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
8489eb67 2919
93da5fff 2920 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
8489eb67 2921 for (i = 'a'; i <= 'z'; i++)
8ea151b2 2922 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2923 for (i = 'A'; i <= 'Z'; i++)
8ea151b2 2924 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
8489eb67 2925 for (i = '0'; i <= '9'; i++)
8ea151b2
RS
2926 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
2927
2928 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
2929 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
2930
2931 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
2932 Fcons (make_number (Sopen), make_number (')')));
2933 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
2934 Fcons (make_number (Sclose), make_number ('(')));
2935 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
2936 Fcons (make_number (Sopen), make_number (']')));
2937 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
2938 Fcons (make_number (Sclose), make_number ('[')));
2939 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
2940 Fcons (make_number (Sopen), make_number ('}')));
2941 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
2942 Fcons (make_number (Sclose), make_number ('{')));
2943 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
2944 Fcons (make_number ((int) Sstring), Qnil));
2945 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
2946 Fcons (make_number ((int) Sescape), Qnil));
2947
93da5fff 2948 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
8489eb67 2949 for (i = 0; i < 10; i++)
78f9a1f7
KH
2950 {
2951 c = "_-+*/&|<>="[i];
2952 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2953 }
8489eb67 2954
93da5fff 2955 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
8489eb67 2956 for (i = 0; i < 12; i++)
78f9a1f7
KH
2957 {
2958 c = ".,;:?!#@~^'`"[i];
2959 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
2960 }
bd25db08
KH
2961
2962 /* All multibyte characters have syntax `word' by default. */
2963 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
2964 for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
2965 XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
8489eb67
RS
2966}
2967
dfcf069d 2968void
8489eb67
RS
2969syms_of_syntax ()
2970{
2971 Qsyntax_table_p = intern ("syntax-table-p");
2972 staticpro (&Qsyntax_table_p);
2973
93da5fff
KH
2974 staticpro (&Vsyntax_code_object);
2975
7bf5e9e4
RS
2976 Qscan_error = intern ("scan-error");
2977 staticpro (&Qscan_error);
2978 Fput (Qscan_error, Qerror_conditions,
f3be100f 2979 Fcons (Qscan_error, Fcons (Qerror, Qnil)));
7bf5e9e4
RS
2980 Fput (Qscan_error, Qerror_message,
2981 build_string ("Scan error"));
2982
8489eb67
RS
2983 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
2984 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
2985
195d1361
RS
2986 DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
2987 "Non-nil means `forward-sexp', etc., grant `syntax-table' property.\n\
2988The value of this property should be either a syntax table, or a cons\n\
2989of the form (SYNTAXCODE . MATCHCHAR), SYNTAXCODE being the numeric\n\
2990syntax code, MATCHCHAR being nil or the character to match (which is\n\
2991relevant only for open/close type.");
2992
8489eb67
RS
2993 words_include_escapes = 0;
2994 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
2995 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
2996
bd25db08
KH
2997 DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
2998 "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
2999 multibyte_syntax_as_symbol = 0;
3000
f4ed767f
GM
3001 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3002 &open_paren_in_column_0_is_defun_start,
3003 "Non-nil means an open paren in column 0 denotes the start of a defun.");
3004 open_paren_in_column_0_is_defun_start = 1;
3005
8489eb67
RS
3006 defsubr (&Ssyntax_table_p);
3007 defsubr (&Ssyntax_table);
3008 defsubr (&Sstandard_syntax_table);
3009 defsubr (&Scopy_syntax_table);
3010 defsubr (&Sset_syntax_table);
3011 defsubr (&Schar_syntax);
beefa22e 3012 defsubr (&Smatching_paren);
8489eb67
RS
3013 defsubr (&Smodify_syntax_entry);
3014 defsubr (&Sdescribe_syntax);
3015
3016 defsubr (&Sforward_word);
3017
195d1361
RS
3018 defsubr (&Sskip_chars_forward);
3019 defsubr (&Sskip_chars_backward);
3020 defsubr (&Sskip_syntax_forward);
3021 defsubr (&Sskip_syntax_backward);
3022
b3cfe0c8 3023 defsubr (&Sforward_comment);
8489eb67
RS
3024 defsubr (&Sscan_lists);
3025 defsubr (&Sscan_sexps);
3026 defsubr (&Sbackward_prefix_chars);
3027 defsubr (&Sparse_partial_sexp);
3028}