(init_syntax_once): Add trick to avoid compiler warning
[bpt/emacs.git] / src / syntax.c
1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include <ctype.h>
24 #include "lisp.h"
25 #include "commands.h"
26 #include "buffer.h"
27 #include "charset.h"
28 #include "syntax.h"
29 #include "category.h"
30
31 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
32
33 static void scan_sexps_forward ();
34 static int char_quoted ();
35
36 int words_include_escapes;
37
38 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
39 if not compiled with GCC. No need to mark it, since it is used
40 only very temporarily. */
41 Lisp_Object syntax_temp;
42
43 /* This is the internal form of the parse state used in parse-partial-sexp. */
44
45 struct lisp_parse_state
46 {
47 int depth; /* Depth at end of parsing */
48 int instring; /* -1 if not within string, else desired terminator. */
49 int incomment; /* Nonzero if within a comment at end of parsing */
50 int comstyle; /* comment style a=0, or b=1 */
51 int quoted; /* Nonzero if just after an escape char at end of parsing */
52 int thislevelstart; /* Char number of most recent start-of-expression at current level */
53 int prevlevelstart; /* Char number of start of containing expression */
54 int location; /* Char number at which parsing stopped. */
55 int mindepth; /* Minimum depth seen while scanning. */
56 int comstart; /* Position just after last comment starter. */
57 };
58 \f
59 /* These variables are a cache for finding the start of a defun.
60 find_start_pos is the place for which the defun start was found.
61 find_start_value is the defun start position found for it.
62 find_start_buffer is the buffer it was found in.
63 find_start_begv is the BEGV value when it was found.
64 find_start_modiff is the value of MODIFF when it was found. */
65
66 static int find_start_pos;
67 static int find_start_value;
68 static struct buffer *find_start_buffer;
69 static int find_start_begv;
70 static int find_start_modiff;
71
72 /* Find a defun-start that is the last one before POS (or nearly the last).
73 We record what we find, so that another call in the same area
74 can return the same value right away. */
75
76 static int
77 find_defun_start (pos)
78 int pos;
79 {
80 int tem;
81 int shortage;
82
83 /* Use previous finding, if it's valid and applies to this inquiry. */
84 if (current_buffer == find_start_buffer
85 /* Reuse the defun-start even if POS is a little farther on.
86 POS might be in the next defun, but that's ok.
87 Our value may not be the best possible, but will still be usable. */
88 && pos <= find_start_pos + 1000
89 && pos >= find_start_value
90 && BEGV == find_start_begv
91 && MODIFF == find_start_modiff)
92 return find_start_value;
93
94 /* Back up to start of line. */
95 tem = scan_buffer ('\n', pos, BEGV, -1, &shortage, 1);
96
97 while (tem > BEGV)
98 {
99 /* Open-paren at start of line means we found our defun-start. */
100 if (SYNTAX (FETCH_CHAR (tem)) == Sopen)
101 break;
102 /* Move to beg of previous line. */
103 tem = scan_buffer ('\n', tem, BEGV, -2, &shortage, 1);
104 }
105
106 /* Record what we found, for the next try. */
107 find_start_value = tem;
108 find_start_buffer = current_buffer;
109 find_start_modiff = MODIFF;
110 find_start_begv = BEGV;
111 find_start_pos = pos;
112
113 return find_start_value;
114 }
115 \f
116 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
117 "Return t if OBJECT is a syntax table.\n\
118 Currently, any char-table counts as a syntax table.")
119 (object)
120 Lisp_Object object;
121 {
122 if (CHAR_TABLE_P (object)
123 && XCHAR_TABLE (object)->purpose == Qsyntax_table)
124 return Qt;
125 return Qnil;
126 }
127
128 static void
129 check_syntax_table (obj)
130 Lisp_Object obj;
131 {
132 if (!(CHAR_TABLE_P (obj)
133 && XCHAR_TABLE (obj)->purpose == Qsyntax_table))
134 wrong_type_argument (Qsyntax_table_p, obj);
135 }
136
137 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
138 "Return the current syntax table.\n\
139 This is the one specified by the current buffer.")
140 ()
141 {
142 return current_buffer->syntax_table;
143 }
144
145 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
146 Sstandard_syntax_table, 0, 0, 0,
147 "Return the standard syntax table.\n\
148 This is the one used for new buffers.")
149 ()
150 {
151 return Vstandard_syntax_table;
152 }
153
154 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
155 "Construct a new syntax table and return it.\n\
156 It is a copy of the TABLE, which defaults to the standard syntax table.")
157 (table)
158 Lisp_Object table;
159 {
160 Lisp_Object copy;
161
162 if (!NILP (table))
163 check_syntax_table (table);
164 else
165 table = Vstandard_syntax_table;
166
167 copy = Fcopy_sequence (table);
168
169 /* Only the standard syntax table should have a default element.
170 Other syntax tables should inherit from parents instead. */
171 XCHAR_TABLE (copy)->defalt = Qnil;
172
173 /* Copied syntax tables should all have parents.
174 If we copied one with no parent, such as the standard syntax table,
175 use the standard syntax table as the copy's parent. */
176 if (NILP (XCHAR_TABLE (copy)->parent))
177 Fset_char_table_parent (copy, Vstandard_syntax_table);
178 return copy;
179 }
180
181 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
182 "Select a new syntax table for the current buffer.\n\
183 One argument, a syntax table.")
184 (table)
185 Lisp_Object table;
186 {
187 check_syntax_table (table);
188 current_buffer->syntax_table = table;
189 /* Indicate that this buffer now has a specified syntax table. */
190 current_buffer->local_var_flags
191 |= XFASTINT (buffer_local_flags.syntax_table);
192 return table;
193 }
194 \f
195 /* Convert a letter which signifies a syntax code
196 into the code it signifies.
197 This is used by modify-syntax-entry, and other things. */
198
199 unsigned char syntax_spec_code[0400] =
200 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
203 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
204 (char) Swhitespace, 0377, (char) Sstring, 0377,
205 (char) Smath, 0377, 0377, (char) Squote,
206 (char) Sopen, (char) Sclose, 0377, 0377,
207 0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
208 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
209 0377, 0377, 0377, 0377,
210 (char) Scomment, 0377, (char) Sendcomment, 0377,
211 (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
212 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
213 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
214 0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
215 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
216 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
217 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
218 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
219 };
220
221 /* Indexed by syntax code, give the letter that describes it. */
222
223 char syntax_code_spec[14] =
224 {
225 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
226 };
227
228 /* Indexed by syntax code, give the object (cons of syntax code and
229 nil) to be stored in syntax table. Since these objects can be
230 shared among syntax tables, we generate them in advance. By
231 sharing objects, the function `describe-syntax' can give a more
232 compact listing. */
233 static Lisp_Object Vsyntax_code_object;
234
235 \f
236 /* Look up the value for CHARACTER in syntax table TABLE's parent
237 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
238 for CHARACTER. It's actually used only when not compiled with GCC. */
239
240 Lisp_Object
241 syntax_parent_lookup (table, character)
242 Lisp_Object table;
243 int character;
244 {
245 Lisp_Object value;
246
247 while (1)
248 {
249 table = XCHAR_TABLE (table)->parent;
250 if (NILP (table))
251 return Qnil;
252
253 value = XCHAR_TABLE (table)->contents[character];
254 if (!NILP (value))
255 return value;
256 }
257 }
258
259 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
260 "Return the syntax code of CHARACTER, described by a character.\n\
261 For example, if CHARACTER is a word constituent,\n\
262 the character `w' is returned.\n\
263 The characters that correspond to various syntax codes\n\
264 are listed in the documentation of `modify-syntax-entry'.")
265 (character)
266 Lisp_Object character;
267 {
268 int char_int;
269 CHECK_NUMBER (character, 0);
270 char_int = XINT (character);
271 return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
272 }
273
274 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
275 "Return the matching parenthesis of CHARACTER, or nil if none.")
276 (character)
277 Lisp_Object character;
278 {
279 int char_int, code;
280 CHECK_NUMBER (character, 0);
281 char_int = XINT (character);
282 code = SYNTAX (char_int);
283 if (code == Sopen || code == Sclose)
284 return make_number (SYNTAX_MATCH (char_int));
285 return Qnil;
286 }
287
288 /* This comment supplies the doc string for modify-syntax-entry,
289 for make-docfile to see. We cannot put this in the real DEFUN
290 due to limits in the Unix cpp.
291
292 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
293 "Set syntax for character CHAR according to string S.\n\
294 The syntax is changed only for table TABLE, which defaults to\n\
295 the current buffer's syntax table.\n\
296 The first character of S should be one of the following:\n\
297 Space or - whitespace syntax. w word constituent.\n\
298 _ symbol constituent. . punctuation.\n\
299 ( open-parenthesis. ) close-parenthesis.\n\
300 \" string quote. \\ escape.\n\
301 $ paired delimiter. ' expression quote or prefix operator.\n\
302 < comment starter. > comment ender.\n\
303 / character-quote. @ inherit from `standard-syntax-table'.\n\
304 \n\
305 Only single-character comment start and end sequences are represented thus.\n\
306 Two-character sequences are represented as described below.\n\
307 The second character of S is the matching parenthesis,\n\
308 used only if the first character is `(' or `)'.\n\
309 Any additional characters are flags.\n\
310 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
311 1 means CHAR is the start of a two-char comment start sequence.\n\
312 2 means CHAR is the second character of such a sequence.\n\
313 3 means CHAR is the start of a two-char comment end sequence.\n\
314 4 means CHAR is the second character of such a sequence.\n\
315 \n\
316 There can be up to two orthogonal comment sequences. This is to support\n\
317 language modes such as C++. By default, all comment sequences are of style\n\
318 a, but you can set the comment sequence style to b (on the second character\n\
319 of a comment-start, or the first character of a comment-end sequence) using\n\
320 this flag:\n\
321 b means CHAR is part of comment sequence b.\n\
322 \n\
323 p means CHAR is a prefix character for `backward-prefix-chars';\n\
324 such characters are treated as whitespace when they occur\n\
325 between expressions.")
326 (char, s, table)
327 */
328
329 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
330 /* I really don't know why this is interactive
331 help-form should at least be made useful whilst reading the second arg
332 */
333 "cSet syntax for character: \nsSet syntax for %s to: ",
334 0 /* See immediately above */)
335 (c, newentry, syntax_table)
336 Lisp_Object c, newentry, syntax_table;
337 {
338 register unsigned char *p;
339 register enum syntaxcode code;
340 int val;
341 Lisp_Object match;
342
343 CHECK_NUMBER (c, 0);
344 CHECK_STRING (newentry, 1);
345
346 if (NILP (syntax_table))
347 syntax_table = current_buffer->syntax_table;
348 else
349 check_syntax_table (syntax_table);
350
351 p = XSTRING (newentry)->data;
352 code = (enum syntaxcode) syntax_spec_code[*p++];
353 if (((int) code & 0377) == 0377)
354 error ("invalid syntax description letter: %c", c);
355
356 if (code == Sinherit)
357 {
358 SET_RAW_SYNTAX_ENTRY (syntax_table, c, Qnil);
359 return Qnil;
360 }
361
362 if (*p)
363 {
364 int len;
365 int character = STRING_CHAR_AND_LENGTH (p, XSTRING (newentry)->size - 1,
366 len);
367 XSETINT (match, character);
368 if (XFASTINT (match) == ' ')
369 match = Qnil;
370 p += len;
371 }
372 else
373 match = Qnil;
374
375 val = (int) code;
376 while (*p)
377 switch (*p++)
378 {
379 case '1':
380 val |= 1 << 16;
381 break;
382
383 case '2':
384 val |= 1 << 17;
385 break;
386
387 case '3':
388 val |= 1 << 18;
389 break;
390
391 case '4':
392 val |= 1 << 19;
393 break;
394
395 case 'p':
396 val |= 1 << 20;
397 break;
398
399 case 'b':
400 val |= 1 << 21;
401 break;
402 }
403
404 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
405 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
406 else
407 /* Since we can't use a shared object, let's make a new one. */
408 newentry = Fcons (make_number (val), match);
409
410 SET_RAW_SYNTAX_ENTRY (syntax_table, c, newentry);
411
412 return Qnil;
413 }
414 \f
415 /* Dump syntax table to buffer in human-readable format */
416
417 static void
418 describe_syntax (value)
419 Lisp_Object value;
420 {
421 register enum syntaxcode code;
422 char desc, match, start1, start2, end1, end2, prefix, comstyle;
423 char str[2];
424 Lisp_Object first, match_lisp;
425
426 Findent_to (make_number (16), make_number (1));
427
428 if (NILP (value))
429 {
430 insert_string ("default\n");
431 return;
432 }
433
434 if (CHAR_TABLE_P (value))
435 {
436 insert_string ("deeper char-table ...\n");
437 return;
438 }
439
440 if (!CONSP (value))
441 {
442 insert_string ("invalid\n");
443 return;
444 }
445
446 first = XCONS (value)->car;
447 match_lisp = XCONS (value)->cdr;
448
449 if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
450 {
451 insert_string ("invalid\n");
452 return;
453 }
454
455 code = (enum syntaxcode) (first & 0377);
456 start1 = (XINT (first) >> 16) & 1;
457 start2 = (XINT (first) >> 17) & 1;
458 end1 = (XINT (first) >> 18) & 1;
459 end2 = (XINT (first) >> 19) & 1;
460 prefix = (XINT (first) >> 20) & 1;
461 comstyle = (XINT (first) >> 21) & 1;
462
463 if ((int) code < 0 || (int) code >= (int) Smax)
464 {
465 insert_string ("invalid");
466 return;
467 }
468 desc = syntax_code_spec[(int) code];
469
470 str[0] = desc, str[1] = 0;
471 insert (str, 1);
472
473 if (NILP (match_lisp))
474 insert (" ", 1);
475 else
476 insert_char (XINT (match_lisp));
477
478 if (start1)
479 insert ("1", 1);
480 if (start2)
481 insert ("2", 1);
482
483 if (end1)
484 insert ("3", 1);
485 if (end2)
486 insert ("4", 1);
487
488 if (prefix)
489 insert ("p", 1);
490 if (comstyle)
491 insert ("b", 1);
492
493 insert_string ("\twhich means: ");
494
495 switch (SWITCH_ENUM_CAST (code))
496 {
497 case Swhitespace:
498 insert_string ("whitespace"); break;
499 case Spunct:
500 insert_string ("punctuation"); break;
501 case Sword:
502 insert_string ("word"); break;
503 case Ssymbol:
504 insert_string ("symbol"); break;
505 case Sopen:
506 insert_string ("open"); break;
507 case Sclose:
508 insert_string ("close"); break;
509 case Squote:
510 insert_string ("quote"); break;
511 case Sstring:
512 insert_string ("string"); break;
513 case Smath:
514 insert_string ("math"); break;
515 case Sescape:
516 insert_string ("escape"); break;
517 case Scharquote:
518 insert_string ("charquote"); break;
519 case Scomment:
520 insert_string ("comment"); break;
521 case Sendcomment:
522 insert_string ("endcomment"); break;
523 default:
524 insert_string ("invalid");
525 return;
526 }
527
528 if (!NILP (match_lisp))
529 {
530 insert_string (", matches ");
531 insert_char (XINT (match_lisp));
532 }
533
534 if (start1)
535 insert_string (",\n\t is the first character of a comment-start sequence");
536 if (start2)
537 insert_string (",\n\t is the second character of a comment-start sequence");
538
539 if (end1)
540 insert_string (",\n\t is the first character of a comment-end sequence");
541 if (end2)
542 insert_string (",\n\t is the second character of a comment-end sequence");
543 if (comstyle)
544 insert_string (" (comment style b)");
545
546 if (prefix)
547 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
548
549 insert_string ("\n");
550 }
551
552 static Lisp_Object
553 describe_syntax_1 (vector)
554 Lisp_Object vector;
555 {
556 struct buffer *old = current_buffer;
557 set_buffer_internal (XBUFFER (Vstandard_output));
558 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
559 while (! NILP (XCHAR_TABLE (vector)->parent))
560 {
561 vector = XCHAR_TABLE (vector)->parent;
562 insert_string ("\nThe parent syntax table is:");
563 describe_vector (vector, Qnil, describe_syntax, 0, Qnil, Qnil);
564 }
565
566 call0 (intern ("help-mode"));
567 set_buffer_internal (old);
568 return Qnil;
569 }
570
571 DEFUN ("describe-syntax", Fdescribe_syntax, Sdescribe_syntax, 0, 0, "",
572 "Describe the syntax specifications in the syntax table.\n\
573 The descriptions are inserted in a buffer, which is then displayed.")
574 ()
575 {
576 internal_with_output_to_temp_buffer
577 ("*Help*", describe_syntax_1, current_buffer->syntax_table);
578
579 return Qnil;
580 }
581 \f
582 /* Return the position across COUNT words from FROM.
583 If that many words cannot be found before the end of the buffer, return 0.
584 COUNT negative means scan backward and stop at word beginning. */
585
586 scan_words (from, count)
587 register int from, count;
588 {
589 register int beg = BEGV;
590 register int end = ZV;
591 register enum syntaxcode code;
592 int ch0, ch1;
593 int temp_pos;
594
595 immediate_quit = 1;
596 QUIT;
597
598 while (count > 0)
599 {
600 while (1)
601 {
602 if (from == end)
603 {
604 immediate_quit = 0;
605 return 0;
606 }
607 ch0 = FETCH_CHAR (from);
608 code = SYNTAX (ch0);
609 INC_POS (from);
610 if (words_include_escapes
611 && (code == Sescape || code == Scharquote))
612 break;
613 if (code == Sword)
614 break;
615 }
616 /* Now CH0 is a character which begins a word and FROM is the
617 position of the next character. */
618 while (1)
619 {
620 if (from == end) break;
621 ch1 = FETCH_CHAR (from);
622 code = SYNTAX (ch1);
623 if (!(words_include_escapes
624 && (code == Sescape || code == Scharquote)))
625 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
626 break;
627 INC_POS (from);
628 ch0 = ch1;
629 }
630 count--;
631 }
632 while (count < 0)
633 {
634 while (1)
635 {
636 if (from == beg)
637 {
638 immediate_quit = 0;
639 return 0;
640 }
641 DEC_POS (from);
642 ch1 = FETCH_CHAR (from);
643 code = SYNTAX (ch1);
644 if (words_include_escapes
645 && (code == Sescape || code == Scharquote))
646 break;
647 if (code == Sword)
648 break;
649 }
650 /* Now CH1 is a character which ends a word and FROM is the
651 position of it. */
652 while (1)
653 {
654 if (from == beg) break;
655 temp_pos = from;
656 DEC_POS (temp_pos);
657 ch0 = FETCH_CHAR (temp_pos);
658 code = SYNTAX (ch0);
659 if (!(words_include_escapes
660 && (code == Sescape || code == Scharquote)))
661 if (code != Sword || WORD_BOUNDARY_P (ch0, ch1))
662 break;
663 from = temp_pos;
664 ch1 = ch0;
665 }
666 count++;
667 }
668
669 immediate_quit = 0;
670
671 return from;
672 }
673
674 DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
675 "Move point forward ARG words (backward if ARG is negative).\n\
676 Normally returns t.\n\
677 If an edge of the buffer is reached, point is left there\n\
678 and nil is returned.")
679 (count)
680 Lisp_Object count;
681 {
682 int val;
683 CHECK_NUMBER (count, 0);
684
685 if (!(val = scan_words (PT, XINT (count))))
686 {
687 SET_PT (XINT (count) > 0 ? ZV : BEGV);
688 return Qnil;
689 }
690 SET_PT (val);
691 return Qt;
692 }
693 \f
694 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
695 "Move forward across up to N comments. If N is negative, move backward.\n\
696 Stop scanning if we find something other than a comment or whitespace.\n\
697 Set point to where scanning stops.\n\
698 If N comments are found as expected, with nothing except whitespace\n\
699 between them, return t; otherwise return nil.")
700 (count)
701 Lisp_Object count;
702 {
703 register int from;
704 register int stop;
705 register int c, c1;
706 register enum syntaxcode code;
707 int comstyle = 0; /* style of comment encountered */
708 int found;
709 int count1;
710 int temp_pos;
711
712 CHECK_NUMBER (count, 0);
713 count1 = XINT (count);
714
715 immediate_quit = 1;
716 QUIT;
717
718 from = PT;
719
720 while (count1 > 0)
721 {
722 stop = ZV;
723 do
724 {
725 if (from == stop)
726 {
727 SET_PT (from);
728 immediate_quit = 0;
729 return Qnil;
730 }
731 c = FETCH_CHAR (from);
732 code = SYNTAX (c);
733 INC_POS (from);
734 comstyle = 0;
735 if (from < stop && SYNTAX_COMSTART_FIRST (c)
736 && (c1 = FETCH_CHAR (from),
737 SYNTAX_COMSTART_SECOND (c1)))
738 {
739 /* We have encountered a comment start sequence and we
740 are ignoring all text inside comments. We must record
741 the comment style this sequence begins so that later,
742 only a comment end of the same style actually ends
743 the comment section. */
744 code = Scomment;
745 comstyle = SYNTAX_COMMENT_STYLE (c1);
746 INC_POS (from);
747 }
748 }
749 while (code == Swhitespace || code == Sendcomment);
750 if (code != Scomment)
751 {
752 immediate_quit = 0;
753 DEC_POS (from);
754 SET_PT (from);
755 return Qnil;
756 }
757 /* We're at the start of a comment. */
758 while (1)
759 {
760 if (from == stop)
761 {
762 immediate_quit = 0;
763 SET_PT (from);
764 return Qnil;
765 }
766 c = FETCH_CHAR (from);
767 INC_POS (from);
768 if (SYNTAX (c) == Sendcomment
769 && SYNTAX_COMMENT_STYLE (c) == comstyle)
770 /* we have encountered a comment end of the same style
771 as the comment sequence which began this comment
772 section */
773 break;
774 if (from < stop && SYNTAX_COMEND_FIRST (c)
775 && (c1 = FETCH_CHAR (from),
776 SYNTAX_COMEND_SECOND (c1))
777 && SYNTAX_COMMENT_STYLE (c) == comstyle)
778 /* we have encountered a comment end of the same style
779 as the comment sequence which began this comment
780 section */
781 { INC_POS (from); break; }
782 }
783 /* We have skipped one comment. */
784 count1--;
785 }
786
787 while (count1 < 0)
788 {
789 stop = BEGV;
790 while (from > stop)
791 {
792 int quoted;
793
794 DEC_POS (from);
795 quoted = char_quoted (from);
796 if (quoted)
797 DEC_POS (from);
798 c = FETCH_CHAR (from);
799 code = SYNTAX (c);
800 comstyle = 0;
801 if (code == Sendcomment)
802 comstyle = SYNTAX_COMMENT_STYLE (c);
803 temp_pos = from;
804 DEC_POS (temp_pos);
805 if (from > stop && SYNTAX_COMEND_SECOND (c)
806 && (c1 = FETCH_CHAR (temp_pos),
807 SYNTAX_COMEND_FIRST (c1))
808 && !char_quoted (temp_pos))
809 {
810 /* We must record the comment style encountered so that
811 later, we can match only the proper comment begin
812 sequence of the same style. */
813 code = Sendcomment;
814 comstyle = SYNTAX_COMMENT_STYLE (c1);
815 from = temp_pos;
816 }
817
818 if (code == Sendcomment && !quoted)
819 {
820 #if 0
821 if (code != SYNTAX (c))
822 /* For a two-char comment ender, we can assume
823 it does end a comment. So scan back in a simple way. */
824 {
825 if (from != stop) DEC_POS (from);
826 while (1)
827 {
828 if ((c = FETCH_CHAR (from),
829 SYNTAX (c) == Scomment)
830 && SYNTAX_COMMENT_STYLE (c) == comstyle)
831 break;
832 if (from == stop)
833 {
834 immediate_quit = 0;
835 SET_PT (from);
836 return Qnil;
837 }
838 DEC_POS (from);
839 if (SYNTAX_COMSTART_SECOND (c)
840 && (c1 = FETCH_CHAR (from),
841 SYNTAX_COMSTART_FIRST (c1))
842 && SYNTAX_COMMENT_STYLE (c) == comstyle
843 && !char_quoted (from))
844 break;
845 }
846 break;
847 }
848 #endif /* 0 */
849
850 /* Look back, counting the parity of string-quotes,
851 and recording the comment-starters seen.
852 When we reach a safe place, assume that's not in a string;
853 then step the main scan to the earliest comment-starter seen
854 an even number of string quotes away from the safe place.
855
856 OFROM[I] is position of the earliest comment-starter seen
857 which is I+2X quotes from the comment-end.
858 PARITY is current parity of quotes from the comment end. */
859 {
860 int parity = 0;
861 char my_stringend = 0;
862 int string_lossage = 0;
863 int comment_end = from;
864 int comstart_pos = 0;
865 int comstart_parity = 0;
866 int scanstart = from;
867
868 DEC_POS (scanstart);
869 /* At beginning of range to scan, we're outside of strings;
870 that determines quote parity to the comment-end. */
871 while (from != stop)
872 {
873 /* Move back and examine a character. */
874 DEC_POS (from);
875
876 c = FETCH_CHAR (from);
877 code = SYNTAX (c);
878
879 /* If this char is the second of a 2-char comment sequence,
880 back up and give the pair the appropriate syntax. */
881 temp_pos = from;
882 DEC_POS (temp_pos);
883 if (from > stop && SYNTAX_COMEND_SECOND (c)
884 && (c1 = FETCH_CHAR (temp_pos),
885 SYNTAX_COMEND_FIRST (c1)))
886 {
887 code = Sendcomment;
888 from = temp_pos;
889 c = c1;
890 }
891
892 temp_pos = from;
893 INC_POS (temp_pos);
894 /* If this char starts a 2-char comment start sequence,
895 treat it like a 1-char comment starter. */
896 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
897 && (c1 = FETCH_CHAR (temp_pos),
898 SYNTAX_COMSTART_SECOND (c1))
899 && comstyle == SYNTAX_COMMENT_STYLE (c1))
900 code = Scomment;
901
902 /* Ignore escaped characters. */
903 if (char_quoted (from))
904 continue;
905
906 /* Track parity of quotes. */
907 if (code == Sstring)
908 {
909 parity ^= 1;
910 if (my_stringend == 0)
911 my_stringend = c;
912 /* If we have two kinds of string delimiters.
913 There's no way to grok this scanning backwards. */
914 else if (my_stringend != c)
915 string_lossage = 1;
916 }
917
918 /* Record comment-starters according to that
919 quote-parity to the comment-end. */
920 if (code == Scomment)
921 {
922 comstart_parity = parity;
923 comstart_pos = from;
924 }
925
926 /* If we find another earlier comment-ender,
927 any comment-starts earlier than that don't count
928 (because they go with the earlier comment-ender). */
929 if (code == Sendcomment
930 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
931 break;
932
933 /* Assume a defun-start point is outside of strings. */
934 if (code == Sopen
935 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
936 break;
937 }
938
939 if (comstart_pos == 0)
940 from = comment_end;
941 /* If the earliest comment starter
942 is followed by uniform paired string quotes or none,
943 we know it can't be inside a string
944 since if it were then the comment ender would be inside one.
945 So it does start a comment. Skip back to it. */
946 else if (comstart_parity == 0 && !string_lossage)
947 from = comstart_pos;
948 else
949 {
950 /* We had two kinds of string delimiters mixed up
951 together. Decode this going forwards.
952 Scan fwd from the previous comment ender
953 to the one in question; this records where we
954 last passed a comment starter. */
955 struct lisp_parse_state state;
956 scan_sexps_forward (&state, find_defun_start (comment_end),
957 comment_end - 1, -10000, 0, Qnil, 0);
958 if (state.incomment)
959 from = state.comstart;
960 else
961 /* We can't grok this as a comment; scan it normally. */
962 from = comment_end;
963 }
964 }
965 /* We have skipped one comment. */
966 break;
967 }
968 else if ((code != Swhitespace && code != Scomment) || quoted)
969 {
970 immediate_quit = 0;
971 INC_POS (from);
972 SET_PT (from);
973 return Qnil;
974 }
975 }
976
977 count1++;
978 }
979
980 SET_PT (from);
981 immediate_quit = 0;
982 return Qt;
983 }
984 \f
985 int parse_sexp_ignore_comments;
986
987 Lisp_Object
988 scan_lists (from, count, depth, sexpflag)
989 register int from;
990 int count, depth, sexpflag;
991 {
992 Lisp_Object val;
993 register int stop;
994 register int c, c1;
995 int stringterm;
996 int quoted;
997 int mathexit = 0;
998 register enum syntaxcode code, temp_code;
999 int min_depth = depth; /* Err out if depth gets less than this. */
1000 int comstyle = 0; /* style of comment encountered */
1001 int temp_pos;
1002 int last_good = from;
1003
1004 if (depth > 0) min_depth = 0;
1005
1006 immediate_quit = 1;
1007 QUIT;
1008
1009 while (count > 0)
1010 {
1011 stop = ZV;
1012 while (from < stop)
1013 {
1014 c = FETCH_CHAR (from);
1015 code = SYNTAX (c);
1016 if (depth == min_depth)
1017 last_good = from;
1018 INC_POS (from);
1019 if (from < stop && SYNTAX_COMSTART_FIRST (c)
1020 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from))
1021 && parse_sexp_ignore_comments)
1022 {
1023 /* we have encountered a comment start sequence and we
1024 are ignoring all text inside comments. we must record
1025 the comment style this sequence begins so that later,
1026 only a comment end of the same style actually ends
1027 the comment section */
1028 code = Scomment;
1029 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1030 INC_POS (from);
1031 }
1032
1033 if (SYNTAX_PREFIX (c))
1034 continue;
1035
1036 switch (SWITCH_ENUM_CAST (code))
1037 {
1038 case Sescape:
1039 case Scharquote:
1040 if (from == stop) goto lose;
1041 INC_POS (from);
1042 /* treat following character as a word constituent */
1043 case Sword:
1044 case Ssymbol:
1045 if (depth || !sexpflag) break;
1046 /* This word counts as a sexp; return at end of it. */
1047 while (from < stop)
1048 {
1049 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1050 {
1051 case Scharquote:
1052 case Sescape:
1053 INC_POS (from);
1054 if (from == stop) goto lose;
1055 break;
1056 case Sword:
1057 case Ssymbol:
1058 case Squote:
1059 break;
1060 default:
1061 goto done;
1062 }
1063 INC_POS (from);
1064 }
1065 goto done;
1066
1067 case Scomment:
1068 if (!parse_sexp_ignore_comments) break;
1069 while (1)
1070 {
1071 if (from == stop)
1072 {
1073 if (depth == 0)
1074 goto done;
1075 goto lose;
1076 }
1077 c = FETCH_CHAR (from);
1078 if (SYNTAX (c) == Sendcomment
1079 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1080 /* we have encountered a comment end of the same style
1081 as the comment sequence which began this comment
1082 section */
1083 break;
1084 INC_POS (from);
1085 if (from < stop && SYNTAX_COMEND_FIRST (c)
1086 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1087 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1088 /* we have encountered a comment end of the same style
1089 as the comment sequence which began this comment
1090 section */
1091 { INC_POS (from); break; }
1092 }
1093 break;
1094
1095 case Smath:
1096 if (!sexpflag)
1097 break;
1098 if (from != stop && c == FETCH_CHAR (from))
1099 INC_POS (from);
1100 if (mathexit)
1101 {
1102 mathexit = 0;
1103 goto close1;
1104 }
1105 mathexit = 1;
1106
1107 case Sopen:
1108 if (!++depth) goto done;
1109 break;
1110
1111 case Sclose:
1112 close1:
1113 if (!--depth) goto done;
1114 if (depth < min_depth)
1115 Fsignal (Qscan_error,
1116 Fcons (build_string ("Containing expression ends prematurely"),
1117 Fcons (make_number (last_good),
1118 Fcons (make_number (from), Qnil))));
1119 break;
1120
1121 case Sstring:
1122 temp_pos = from;
1123 DEC_POS (temp_pos);
1124 stringterm = FETCH_CHAR (temp_pos);
1125 while (1)
1126 {
1127 if (from >= stop) goto lose;
1128 if (FETCH_CHAR (from) == stringterm) break;
1129 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1130 {
1131 case Scharquote:
1132 case Sescape:
1133 INC_POS (from);
1134 }
1135 INC_POS (from);
1136 }
1137 INC_POS (from);
1138 if (!depth && sexpflag) goto done;
1139 break;
1140 }
1141 }
1142
1143 /* Reached end of buffer. Error if within object, return nil if between */
1144 if (depth) goto lose;
1145
1146 immediate_quit = 0;
1147 return Qnil;
1148
1149 /* End of object reached */
1150 done:
1151 count--;
1152 }
1153
1154 while (count < 0)
1155 {
1156 stop = BEGV;
1157 while (from > stop)
1158 {
1159 DEC_POS (from);
1160 if (quoted = char_quoted (from))
1161 DEC_POS (from);
1162 c = FETCH_CHAR (from);
1163 code = SYNTAX (c);
1164 if (depth == min_depth)
1165 last_good = from;
1166 comstyle = 0;
1167 if (code == Sendcomment)
1168 comstyle = SYNTAX_COMMENT_STYLE (c);
1169 temp_pos = from;
1170 DEC_POS (temp_pos);
1171 if (from > stop && SYNTAX_COMEND_SECOND (c)
1172 && (c1 = FETCH_CHAR (temp_pos), SYNTAX_COMEND_FIRST (c1))
1173 && !char_quoted (temp_pos)
1174 && parse_sexp_ignore_comments)
1175 {
1176 /* we must record the comment style encountered so that
1177 later, we can match only the proper comment begin
1178 sequence of the same style */
1179 code = Sendcomment;
1180 comstyle = SYNTAX_COMMENT_STYLE (c1);
1181 from = temp_pos;
1182 }
1183
1184 if (SYNTAX_PREFIX (c))
1185 continue;
1186
1187 switch (SWITCH_ENUM_CAST (quoted ? Sword : code))
1188 {
1189 case Sword:
1190 case Ssymbol:
1191 if (depth || !sexpflag) break;
1192 /* This word counts as a sexp; count object finished after passing it. */
1193 while (from > stop)
1194 {
1195 temp_pos = from;
1196 DEC_POS (temp_pos);
1197 quoted = char_quoted (temp_pos);
1198 if (quoted)
1199 {
1200 from = temp_pos;
1201 DEC_POS (temp_pos);
1202 }
1203 c1 = FETCH_CHAR (temp_pos);
1204 temp_code = SYNTAX (c1);
1205 if (! (quoted || temp_code == Sword
1206 || temp_code == Ssymbol
1207 || temp_code == Squote))
1208 goto done2;
1209 from = temp_pos;
1210 }
1211 goto done2;
1212
1213 case Smath:
1214 if (!sexpflag)
1215 break;
1216 temp_pos = from;
1217 DEC_POS (temp_pos);
1218 if (from != stop && c == FETCH_CHAR (temp_pos))
1219 from = temp_pos;
1220 if (mathexit)
1221 {
1222 mathexit = 0;
1223 goto open2;
1224 }
1225 mathexit = 1;
1226
1227 case Sclose:
1228 if (!++depth) goto done2;
1229 break;
1230
1231 case Sopen:
1232 open2:
1233 if (!--depth) goto done2;
1234 if (depth < min_depth)
1235 Fsignal (Qscan_error,
1236 Fcons (build_string ("Containing expression ends prematurely"),
1237 Fcons (make_number (last_good),
1238 Fcons (make_number (from), Qnil))));
1239 break;
1240
1241 case Sendcomment:
1242 if (!parse_sexp_ignore_comments)
1243 break;
1244 #if 0
1245 if (code != SYNTAX (c))
1246 /* For a two-char comment ender, we can assume
1247 it does end a comment. So scan back in a simple way. */
1248 {
1249 if (from != stop) DEC_POS (from);
1250 while (1)
1251 {
1252 if (SYNTAX (c = FETCH_CHAR (from)) == Scomment
1253 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1254 break;
1255 if (from == stop)
1256 {
1257 if (depth == 0)
1258 goto done2;
1259 goto lose;
1260 }
1261 DEC_POS (from);
1262 if (SYNTAX_COMSTART_SECOND (c)
1263 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from))
1264 && SYNTAX_COMMENT_STYLE (c) == comstyle
1265 && !char_quoted (from))
1266 break;
1267 }
1268 break;
1269 }
1270 #endif /* 0 */
1271
1272 /* Look back, counting the parity of string-quotes,
1273 and recording the comment-starters seen.
1274 When we reach a safe place, assume that's not in a string;
1275 then step the main scan to the earliest comment-starter seen
1276 an even number of string quotes away from the safe place.
1277
1278 OFROM[I] is position of the earliest comment-starter seen
1279 which is I+2X quotes from the comment-end.
1280 PARITY is current parity of quotes from the comment end. */
1281 {
1282 int parity = 0;
1283 char my_stringend = 0;
1284 int string_lossage = 0;
1285 int comment_end = from;
1286 int comstart_pos = 0;
1287 int comstart_parity = 0;
1288 int scanstart = from;
1289
1290 DEC_POS (scanstart);
1291
1292 /* At beginning of range to scan, we're outside of strings;
1293 that determines quote parity to the comment-end. */
1294 while (from != stop)
1295 {
1296 /* Move back and examine a character. */
1297 DEC_POS (from);
1298
1299 c = FETCH_CHAR (from);
1300 code = SYNTAX (c);
1301
1302 /* If this char is the second of a 2-char comment sequence,
1303 back up and give the pair the appropriate syntax. */
1304 temp_pos = from;
1305 DEC_POS (temp_pos);
1306 if (from > stop && SYNTAX_COMEND_SECOND (c)
1307 && (c1 = FETCH_CHAR (temp_pos),
1308 SYNTAX_COMEND_FIRST (c1)))
1309 {
1310 code = Sendcomment;
1311 from = temp_pos;
1312 c = c1;
1313 }
1314
1315 /* If this char starts a 2-char comment start sequence,
1316 treat it like a 1-char comment starter. */
1317 temp_pos = from;
1318 INC_POS (temp_pos);
1319 if (from < scanstart && SYNTAX_COMSTART_FIRST (c)
1320 && (c1 = FETCH_CHAR (temp_pos),
1321 SYNTAX_COMSTART_SECOND (c1))
1322 && comstyle == SYNTAX_COMMENT_STYLE (c1))
1323 code = Scomment;
1324
1325 /* Ignore escaped characters. */
1326 if (char_quoted (from))
1327 continue;
1328
1329 /* Track parity of quotes. */
1330 if (code == Sstring)
1331 {
1332 parity ^= 1;
1333 if (my_stringend == 0)
1334 my_stringend = c;
1335 /* If we have two kinds of string delimiters.
1336 There's no way to grok this scanning backwards. */
1337 else if (my_stringend != c)
1338 string_lossage = 1;
1339 }
1340
1341 /* Record comment-starters according to that
1342 quote-parity to the comment-end. */
1343 if (code == Scomment)
1344 {
1345 comstart_parity = parity;
1346 comstart_pos = from;
1347 }
1348
1349 /* If we find another earlier comment-ender,
1350 any comment-starts earlier than that don't count
1351 (because they go with the earlier comment-ender). */
1352 if (code == Sendcomment
1353 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from)) == comstyle)
1354 break;
1355
1356 /* Assume a defun-start point is outside of strings. */
1357 if (code == Sopen
1358 && (from == stop || FETCH_BYTE (from - 1) == '\n'))
1359 break;
1360 }
1361
1362 if (comstart_pos == 0)
1363 from = comment_end;
1364 /* If the earliest comment starter
1365 is followed by uniform paired string quotes or none,
1366 we know it can't be inside a string
1367 since if it were then the comment ender would be inside one.
1368 So it does start a comment. Skip back to it. */
1369 else if (comstart_parity == 0 && !string_lossage)
1370 from = comstart_pos;
1371 else
1372 {
1373 /* We had two kinds of string delimiters mixed up
1374 together. Decode this going forwards.
1375 Scan fwd from the previous comment ender
1376 to the one in question; this records where we
1377 last passed a comment starter. */
1378 struct lisp_parse_state state;
1379 scan_sexps_forward (&state, find_defun_start (comment_end),
1380 comment_end - 1, -10000, 0, Qnil, 0);
1381 if (state.incomment)
1382 from = state.comstart;
1383 else
1384 /* We can't grok this as a comment; scan it normally. */
1385 from = comment_end;
1386 }
1387 }
1388 break;
1389
1390 case Sstring:
1391 stringterm = FETCH_CHAR (from);
1392 while (1)
1393 {
1394 if (from == stop) goto lose;
1395 temp_pos = from;
1396 DEC_POS (temp_pos);
1397 if (!char_quoted (temp_pos)
1398 && stringterm == FETCH_CHAR (temp_pos))
1399 break;
1400 from = temp_pos;
1401 }
1402 DEC_POS (from);
1403 if (!depth && sexpflag) goto done2;
1404 break;
1405 }
1406 }
1407
1408 /* Reached start of buffer. Error if within object, return nil if between */
1409 if (depth) goto lose;
1410
1411 immediate_quit = 0;
1412 return Qnil;
1413
1414 done2:
1415 count++;
1416 }
1417
1418
1419 immediate_quit = 0;
1420 XSETFASTINT (val, from);
1421 return val;
1422
1423 lose:
1424 Fsignal (Qscan_error,
1425 Fcons (build_string ("Unbalanced parentheses"),
1426 Fcons (make_number (last_good),
1427 Fcons (make_number (from), Qnil))));
1428
1429 /* NOTREACHED */
1430 }
1431
1432 static int
1433 char_quoted (pos)
1434 register int pos;
1435 {
1436 register enum syntaxcode code;
1437 register int beg = BEGV;
1438 register int quoted = 0;
1439 int temp_pos = pos;
1440
1441 DEC_POS (temp_pos);
1442 while (pos > beg
1443 && ((code = SYNTAX (FETCH_CHAR (temp_pos))) == Scharquote
1444 || code == Sescape))
1445 {
1446 pos = temp_pos;
1447 quoted = !quoted;
1448 DEC_POS (temp_pos);
1449 }
1450 return quoted;
1451 }
1452
1453 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
1454 "Scan from character number FROM by COUNT lists.\n\
1455 Returns the character number of the position thus found.\n\
1456 \n\
1457 If DEPTH is nonzero, paren depth begins counting from that value,\n\
1458 only places where the depth in parentheses becomes zero\n\
1459 are candidates for stopping; COUNT such places are counted.\n\
1460 Thus, a positive value for DEPTH means go out levels.\n\
1461 \n\
1462 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1463 \n\
1464 If the beginning or end of (the accessible part of) the buffer is reached\n\
1465 and the depth is wrong, an error is signaled.\n\
1466 If the depth is right but the count is not used up, nil is returned.")
1467 (from, count, depth)
1468 Lisp_Object from, count, depth;
1469 {
1470 CHECK_NUMBER (from, 0);
1471 CHECK_NUMBER (count, 1);
1472 CHECK_NUMBER (depth, 2);
1473
1474 return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
1475 }
1476
1477 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
1478 "Scan from character number FROM by COUNT balanced expressions.\n\
1479 If COUNT is negative, scan backwards.\n\
1480 Returns the character number of the position thus found.\n\
1481 \n\
1482 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1483 \n\
1484 If the beginning or end of (the accessible part of) the buffer is reached\n\
1485 in the middle of a parenthetical grouping, an error is signaled.\n\
1486 If the beginning or end is reached between groupings\n\
1487 but before count is used up, nil is returned.")
1488 (from, count)
1489 Lisp_Object from, count;
1490 {
1491 CHECK_NUMBER (from, 0);
1492 CHECK_NUMBER (count, 1);
1493
1494 return scan_lists (XINT (from), XINT (count), 0, 1);
1495 }
1496
1497 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
1498 0, 0, 0,
1499 "Move point backward over any number of chars with prefix syntax.\n\
1500 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1501 ()
1502 {
1503 int beg = BEGV;
1504 int pos = PT;
1505 int c;
1506 int temp_pos = pos;
1507
1508 DEC_POS (temp_pos);
1509
1510 while (pos > beg && !char_quoted (temp_pos)
1511 && ((c = FETCH_CHAR (temp_pos), SYNTAX (c) == Squote)
1512 || SYNTAX_PREFIX (c)))
1513 {
1514 pos = temp_pos;
1515 DEC_POS (temp_pos);
1516 }
1517
1518 SET_PT (pos);
1519
1520 return Qnil;
1521 }
1522 \f
1523 /* Parse forward from FROM to END,
1524 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1525 and return a description of the state of the parse at END.
1526 If STOPBEFORE is nonzero, stop at the start of an atom.
1527 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1528
1529 static void
1530 scan_sexps_forward (stateptr, from, end, targetdepth,
1531 stopbefore, oldstate, commentstop)
1532 struct lisp_parse_state *stateptr;
1533 register int from;
1534 int end, targetdepth, stopbefore;
1535 Lisp_Object oldstate;
1536 int commentstop;
1537 {
1538 struct lisp_parse_state state;
1539
1540 register enum syntaxcode code;
1541 struct level { int last, prev; };
1542 struct level levelstart[100];
1543 register struct level *curlevel = levelstart;
1544 struct level *endlevel = levelstart + 100;
1545 int prev;
1546 register int depth; /* Paren depth of current scanning location.
1547 level - levelstart equals this except
1548 when the depth becomes negative. */
1549 int mindepth; /* Lowest DEPTH value seen. */
1550 int start_quoted = 0; /* Nonzero means starting after a char quote */
1551 Lisp_Object tem;
1552 int prev_from; /* Keep one character before FROM. */
1553
1554 prev_from = from;
1555 DEC_POS (prev_from);
1556
1557 /* Use this macro instead of `from++'. */
1558 #define INC_FROM do { prev_from = from; INC_POS (from); } while (0)
1559
1560 immediate_quit = 1;
1561 QUIT;
1562
1563 if (NILP (oldstate))
1564 {
1565 depth = 0;
1566 state.instring = -1;
1567 state.incomment = 0;
1568 state.comstyle = 0; /* comment style a by default */
1569 }
1570 else
1571 {
1572 tem = Fcar (oldstate);
1573 if (!NILP (tem))
1574 depth = XINT (tem);
1575 else
1576 depth = 0;
1577
1578 oldstate = Fcdr (oldstate);
1579 oldstate = Fcdr (oldstate);
1580 oldstate = Fcdr (oldstate);
1581 tem = Fcar (oldstate);
1582 state.instring = !NILP (tem) ? XINT (tem) : -1;
1583
1584 oldstate = Fcdr (oldstate);
1585 tem = Fcar (oldstate);
1586 state.incomment = !NILP (tem);
1587
1588 oldstate = Fcdr (oldstate);
1589 tem = Fcar (oldstate);
1590 start_quoted = !NILP (tem);
1591
1592 /* if the eight element of the list is nil, we are in comment
1593 style a. if it is non-nil, we are in comment style b */
1594 oldstate = Fcdr (oldstate);
1595 oldstate = Fcdr (oldstate);
1596 tem = Fcar (oldstate);
1597 state.comstyle = !NILP (tem);
1598 }
1599 state.quoted = 0;
1600 mindepth = depth;
1601
1602 curlevel->prev = -1;
1603 curlevel->last = -1;
1604
1605 /* Enter the loop at a place appropriate for initial state. */
1606
1607 if (state.incomment) goto startincomment;
1608 if (state.instring >= 0)
1609 {
1610 if (start_quoted) goto startquotedinstring;
1611 goto startinstring;
1612 }
1613 if (start_quoted) goto startquoted;
1614
1615 while (from < end)
1616 {
1617 code = SYNTAX (FETCH_CHAR (from));
1618 INC_FROM;
1619 if (code == Scomment)
1620 state.comstart = prev_from;
1621
1622 else if (from < end && SYNTAX_COMSTART_FIRST (FETCH_CHAR (prev_from))
1623 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from)))
1624 {
1625 /* Record the comment style we have entered so that only
1626 the comment-end sequence of the same style actually
1627 terminates the comment section. */
1628 code = Scomment;
1629 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from));
1630 state.comstart = prev_from;
1631 INC_FROM;
1632 }
1633
1634 if (SYNTAX_PREFIX (FETCH_CHAR (prev_from)))
1635 continue;
1636 switch (SWITCH_ENUM_CAST (code))
1637 {
1638 case Sescape:
1639 case Scharquote:
1640 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1641 curlevel->last = prev_from;
1642 startquoted:
1643 if (from == end) goto endquoted;
1644 INC_FROM;
1645 goto symstarted;
1646 /* treat following character as a word constituent */
1647 case Sword:
1648 case Ssymbol:
1649 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1650 curlevel->last = prev_from;
1651 symstarted:
1652 while (from < end)
1653 {
1654 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from))))
1655 {
1656 case Scharquote:
1657 case Sescape:
1658 INC_FROM;
1659 if (from == end) goto endquoted;
1660 break;
1661 case Sword:
1662 case Ssymbol:
1663 case Squote:
1664 break;
1665 default:
1666 goto symdone;
1667 }
1668 INC_FROM;
1669 }
1670 symdone:
1671 curlevel->prev = curlevel->last;
1672 break;
1673
1674 startincomment:
1675 if (commentstop)
1676 goto done;
1677 if (from != BEGV)
1678 {
1679 /* Enter the loop in the middle so that we find
1680 a 2-char comment ender if we start in the middle of it. */
1681 prev = FETCH_CHAR (prev_from);
1682 goto startincomment_1;
1683 }
1684 /* At beginning of buffer, enter the loop the ordinary way. */
1685
1686 case Scomment:
1687 state.incomment = 1;
1688 if (commentstop)
1689 goto done;
1690 while (1)
1691 {
1692 if (from == end) goto done;
1693 prev = FETCH_CHAR (from);
1694 if (SYNTAX (prev) == Sendcomment
1695 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1696 /* Only terminate the comment section if the endcomment
1697 of the same style as the start sequence has been
1698 encountered. */
1699 break;
1700 INC_FROM;
1701 startincomment_1:
1702 if (from < end && SYNTAX_COMEND_FIRST (prev)
1703 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from))
1704 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle)
1705 /* Only terminate the comment section if the end-comment
1706 sequence of the same style as the start sequence has
1707 been encountered. */
1708 { INC_FROM; break; }
1709 }
1710 state.incomment = 0;
1711 state.comstyle = 0; /* reset the comment style */
1712 break;
1713
1714 case Sopen:
1715 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1716 depth++;
1717 /* curlevel++->last ran into compiler bug on Apollo */
1718 curlevel->last = prev_from;
1719 if (++curlevel == endlevel)
1720 error ("Nesting too deep for parser");
1721 curlevel->prev = -1;
1722 curlevel->last = -1;
1723 if (targetdepth == depth) goto done;
1724 break;
1725
1726 case Sclose:
1727 depth--;
1728 if (depth < mindepth)
1729 mindepth = depth;
1730 if (curlevel != levelstart)
1731 curlevel--;
1732 curlevel->prev = curlevel->last;
1733 if (targetdepth == depth) goto done;
1734 break;
1735
1736 case Sstring:
1737 if (stopbefore) goto stop; /* this arg means stop at sexp start */
1738 curlevel->last = prev_from;
1739 state.instring = FETCH_CHAR (prev_from);
1740 startinstring:
1741 while (1)
1742 {
1743 int c;
1744
1745 if (from >= end) goto done;
1746 c = FETCH_CHAR (from);
1747 if (c == state.instring) break;
1748 switch (SWITCH_ENUM_CAST (SYNTAX (c)))
1749 {
1750 case Scharquote:
1751 case Sescape:
1752 INC_FROM;
1753 startquotedinstring:
1754 if (from >= end) goto endquoted;
1755 }
1756 INC_FROM;
1757 }
1758 state.instring = -1;
1759 curlevel->prev = curlevel->last;
1760 INC_FROM;
1761 break;
1762
1763 case Smath:
1764 break;
1765 }
1766 }
1767 goto done;
1768
1769 stop: /* Here if stopping before start of sexp. */
1770 from = prev_from; /* We have just fetched the char that starts it; */
1771 goto done; /* but return the position before it. */
1772
1773 endquoted:
1774 state.quoted = 1;
1775 done:
1776 state.depth = depth;
1777 state.mindepth = mindepth;
1778 state.thislevelstart = curlevel->prev;
1779 state.prevlevelstart
1780 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
1781 state.location = from;
1782 immediate_quit = 0;
1783
1784 *stateptr = state;
1785 }
1786
1787 /* This comment supplies the doc string for parse-partial-sexp,
1788 for make-docfile to see. We cannot put this in the real DEFUN
1789 due to limits in the Unix cpp.
1790
1791 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
1792 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1793 Parsing stops at TO or when certain criteria are met;\n\
1794 point is set to where parsing stops.\n\
1795 If fifth arg STATE is omitted or nil,\n\
1796 parsing assumes that FROM is the beginning of a function.\n\
1797 Value is a list of eight elements describing final state of parsing:\n\
1798 0. depth in parens.\n\
1799 1. character address of start of innermost containing list; nil if none.\n\
1800 2. character address of start of last complete sexp terminated.\n\
1801 3. non-nil if inside a string.\n\
1802 (it is the character that will terminate the string.)\n\
1803 4. t if inside a comment.\n\
1804 5. t if following a quote character.\n\
1805 6. the minimum paren-depth encountered during this scan.\n\
1806 7. t if in a comment of style `b'.\n\
1807 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1808 in parentheses becomes equal to TARGETDEPTH.\n\
1809 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1810 any character that starts a sexp.\n\
1811 Fifth arg STATE is an eight-list like what this function returns.\n\
1812 It is used to initialize the state of the parse. Its second and third
1813 elements are ignored.
1814 Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1815 (from, to, targetdepth, stopbefore, state, commentstop)
1816 */
1817
1818 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
1819 0 /* See immediately above */)
1820 (from, to, targetdepth, stopbefore, oldstate, commentstop)
1821 Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
1822 {
1823 struct lisp_parse_state state;
1824 int target;
1825
1826 if (!NILP (targetdepth))
1827 {
1828 CHECK_NUMBER (targetdepth, 3);
1829 target = XINT (targetdepth);
1830 }
1831 else
1832 target = -100000; /* We won't reach this depth */
1833
1834 validate_region (&from, &to);
1835 scan_sexps_forward (&state, XINT (from), XINT (to),
1836 target, !NILP (stopbefore), oldstate,
1837 !NILP (commentstop));
1838
1839 SET_PT (state.location);
1840
1841 return Fcons (make_number (state.depth),
1842 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
1843 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
1844 Fcons (state.instring >= 0 ? make_number (state.instring) : Qnil,
1845 Fcons (state.incomment ? Qt : Qnil,
1846 Fcons (state.quoted ? Qt : Qnil,
1847 Fcons (make_number (state.mindepth),
1848 Fcons (state.comstyle ? Qt : Qnil,
1849 Qnil))))))));
1850 }
1851 \f
1852 init_syntax_once ()
1853 {
1854 register int i, c;
1855 Lisp_Object temp;
1856
1857 /* This has to be done here, before we call Fmake_char_table. */
1858 Qsyntax_table = intern ("syntax-table");
1859 staticpro (&Qsyntax_table);
1860
1861 /* Intern this now in case it isn't already done.
1862 Setting this variable twice is harmless.
1863 But don't staticpro it here--that is done in alloc.c. */
1864 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1865
1866 /* Create objects which can be shared among syntax tables. */
1867 Vsyntax_code_object = Fmake_vector (13, Qnil);
1868 for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
1869 XVECTOR (Vsyntax_code_object)->contents[i]
1870 = Fcons (make_number (i), Qnil);
1871
1872 /* Now we are ready to set up this property, so we can
1873 create syntax tables. */
1874 Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
1875
1876 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
1877
1878 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
1879
1880 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
1881 for (i = 'a'; i <= 'z'; i++)
1882 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1883 for (i = 'A'; i <= 'Z'; i++)
1884 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1885 for (i = '0'; i <= '9'; i++)
1886 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
1887
1888 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
1889 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
1890
1891 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
1892 Fcons (make_number (Sopen), make_number (')')));
1893 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
1894 Fcons (make_number (Sclose), make_number ('(')));
1895 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
1896 Fcons (make_number (Sopen), make_number (']')));
1897 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
1898 Fcons (make_number (Sclose), make_number ('[')));
1899 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
1900 Fcons (make_number (Sopen), make_number ('}')));
1901 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
1902 Fcons (make_number (Sclose), make_number ('{')));
1903 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
1904 Fcons (make_number ((int) Sstring), Qnil));
1905 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
1906 Fcons (make_number ((int) Sescape), Qnil));
1907
1908 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
1909 for (i = 0; i < 10; i++)
1910 {
1911 c = "_-+*/&|<>="[i];
1912 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
1913 }
1914
1915 temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
1916 for (i = 0; i < 12; i++)
1917 {
1918 c = ".,;:?!#@~^'`"[i];
1919 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
1920 }
1921 }
1922
1923 syms_of_syntax ()
1924 {
1925 Qsyntax_table_p = intern ("syntax-table-p");
1926 staticpro (&Qsyntax_table_p);
1927
1928 staticpro (&Vsyntax_code_object);
1929
1930 Qscan_error = intern ("scan-error");
1931 staticpro (&Qscan_error);
1932 Fput (Qscan_error, Qerror_conditions,
1933 Fcons (Qerror, Qnil));
1934 Fput (Qscan_error, Qerror_message,
1935 build_string ("Scan error"));
1936
1937 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
1938 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1939
1940 words_include_escapes = 0;
1941 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
1942 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1943
1944 defsubr (&Ssyntax_table_p);
1945 defsubr (&Ssyntax_table);
1946 defsubr (&Sstandard_syntax_table);
1947 defsubr (&Scopy_syntax_table);
1948 defsubr (&Sset_syntax_table);
1949 defsubr (&Schar_syntax);
1950 defsubr (&Smatching_paren);
1951 defsubr (&Smodify_syntax_entry);
1952 defsubr (&Sdescribe_syntax);
1953
1954 defsubr (&Sforward_word);
1955
1956 defsubr (&Sforward_comment);
1957 defsubr (&Sscan_lists);
1958 defsubr (&Sscan_sexps);
1959 defsubr (&Sbackward_prefix_chars);
1960 defsubr (&Sparse_partial_sexp);
1961 }