1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
28 Lisp_Object Qsyntax_table_p
, Qsyntax_table
;
30 static void scan_sexps_forward ();
31 static int char_quoted ();
33 int words_include_escapes
;
35 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
36 if not compiled with GCC. No need to mark it, since it is used
37 only very temporarily. */
38 Lisp_Object syntax_temp
;
40 /* This is the internal form of the parse state used in parse-partial-sexp. */
42 struct lisp_parse_state
44 int depth
; /* Depth at end of parsing */
45 int instring
; /* -1 if not within string, else desired terminator. */
46 int incomment
; /* Nonzero if within a comment at end of parsing */
47 int comstyle
; /* comment style a=0, or b=1 */
48 int quoted
; /* Nonzero if just after an escape char at end of parsing */
49 int thislevelstart
; /* Char number of most recent start-of-expression at current level */
50 int prevlevelstart
; /* Char number of start of containing expression */
51 int location
; /* Char number at which parsing stopped. */
52 int mindepth
; /* Minimum depth seen while scanning. */
53 int comstart
; /* Position just after last comment starter. */
56 /* These variables are a cache for finding the start of a defun.
57 find_start_pos is the place for which the defun start was found.
58 find_start_value is the defun start position found for it.
59 find_start_buffer is the buffer it was found in.
60 find_start_begv is the BEGV value when it was found.
61 find_start_modiff is the value of MODIFF when it was found. */
63 static int find_start_pos
;
64 static int find_start_value
;
65 static struct buffer
*find_start_buffer
;
66 static int find_start_begv
;
67 static int find_start_modiff
;
69 /* Find a defun-start that is the last one before POS (or nearly the last).
70 We record what we find, so that another call in the same area
71 can return the same value right away. */
74 find_defun_start (pos
)
80 /* Use previous finding, if it's valid and applies to this inquiry. */
81 if (current_buffer
== find_start_buffer
82 /* Reuse the defun-start even if POS is a little farther on.
83 POS might be in the next defun, but that's ok.
84 Our value may not be the best possible, but will still be usable. */
85 && pos
<= find_start_pos
+ 1000
86 && pos
>= find_start_value
87 && BEGV
== find_start_begv
88 && MODIFF
== find_start_modiff
)
89 return find_start_value
;
91 /* Back up to start of line. */
92 tem
= scan_buffer ('\n', pos
, BEGV
, -1, &shortage
, 1);
96 /* Open-paren at start of line means we found our defun-start. */
97 if (SYNTAX (FETCH_CHAR (tem
)) == Sopen
)
99 /* Move to beg of previous line. */
100 tem
= scan_buffer ('\n', tem
, BEGV
, -2, &shortage
, 1);
103 /* Record what we found, for the next try. */
104 find_start_value
= tem
;
105 find_start_buffer
= current_buffer
;
106 find_start_modiff
= MODIFF
;
107 find_start_begv
= BEGV
;
108 find_start_pos
= pos
;
110 return find_start_value
;
113 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
114 "Return t if ARG is a syntax table.\n\
115 Currently, any char-table counts as a syntax table.")
119 if (CHAR_TABLE_P (obj
)
120 && XCHAR_TABLE (obj
)->purpose
== Qsyntax_table
)
126 check_syntax_table (obj
)
129 CHECK_CHAR_TABLE (obj
, 0);
133 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
134 "Return the current syntax table.\n\
135 This is the one specified by the current buffer.")
138 return current_buffer
->syntax_table
;
141 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
142 Sstandard_syntax_table
, 0, 0, 0,
143 "Return the standard syntax table.\n\
144 This is the one used for new buffers.")
147 return Vstandard_syntax_table
;
150 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
151 "Construct a new syntax table and return it.\n\
152 It is a copy of the TABLE, which defaults to the standard syntax table.")
159 check_syntax_table (table
);
161 table
= Vstandard_syntax_table
;
163 copy
= Fcopy_sequence (table
);
164 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
168 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
169 "Select a new syntax table for the current buffer.\n\
170 One argument, a syntax table.")
174 check_syntax_table (table
);
175 current_buffer
->syntax_table
= table
;
176 /* Indicate that this buffer now has a specified syntax table. */
177 current_buffer
->local_var_flags
178 |= XFASTINT (buffer_local_flags
.syntax_table
);
182 /* Convert a letter which signifies a syntax code
183 into the code it signifies.
184 This is used by modify-syntax-entry, and other things. */
186 unsigned char syntax_spec_code
[0400] =
187 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
188 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
189 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 (char) Swhitespace
, 0377, (char) Sstring
, 0377,
192 (char) Smath
, 0377, 0377, (char) Squote
,
193 (char) Sopen
, (char) Sclose
, 0377, 0377,
194 0377, (char) Swhitespace
, (char) Spunct
, (char) Scharquote
,
195 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
196 0377, 0377, 0377, 0377,
197 (char) Scomment
, 0377, (char) Sendcomment
, 0377,
198 (char) Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
199 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
200 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
201 0377, 0377, 0377, 0377, (char) Sescape
, 0377, 0377, (char) Ssymbol
,
202 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
203 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
208 /* Indexed by syntax code, give the letter that describes it. */
210 char syntax_code_spec
[14] =
212 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
215 /* Look up the value for CHARACTER in syntax table TABLE's parent
216 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
217 for CHARACTER. It's actually used only when not compiled with GCC. */
220 syntax_parent_lookup (table
, character
)
228 table
= XCHAR_TABLE (table
)->parent
;
232 value
= XCHAR_TABLE (table
)->contents
[character
];
238 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
239 "Return the syntax code of CHAR, described by a character.\n\
240 For example, if CHAR is a word constituent, the character `?w' is returned.\n\
241 The characters that correspond to various syntax codes\n\
242 are listed in the documentation of `modify-syntax-entry'.")
247 CHECK_NUMBER (ch
, 0);
248 char_int
= XINT (ch
);
249 return make_number (syntax_code_spec
[(int) SYNTAX (char_int
)]);
252 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
253 "Return the matching parenthesis of CHAR, or nil if none.")
258 CHECK_NUMBER (ch
, 0);
259 char_int
= XINT (ch
);
260 code
= SYNTAX (char_int
);
261 if (code
== Sopen
|| code
== Sclose
)
262 return make_number (SYNTAX_MATCH (char_int
));
266 /* This comment supplies the doc string for modify-syntax-entry,
267 for make-docfile to see. We cannot put this in the real DEFUN
268 due to limits in the Unix cpp.
270 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
271 "Set syntax for character CHAR according to string S.\n\
272 The syntax is changed only for table TABLE, which defaults to\n\
273 the current buffer's syntax table.\n\
274 The first character of S should be one of the following:\n\
275 Space or - whitespace syntax. w word constituent.\n\
276 _ symbol constituent. . punctuation.\n\
277 ( open-parenthesis. ) close-parenthesis.\n\
278 \" string quote. \\ escape.\n\
279 $ paired delimiter. ' expression quote or prefix operator.\n\
280 < comment starter. > comment ender.\n\
281 / character-quote. @ inherit from `standard-syntax-table'.\n\
283 Only single-character comment start and end sequences are represented thus.\n\
284 Two-character sequences are represented as described below.\n\
285 The second character of S is the matching parenthesis,\n\
286 used only if the first character is `(' or `)'.\n\
287 Any additional characters are flags.\n\
288 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
289 1 means C is the start of a two-char comment start sequence.\n\
290 2 means C is the second character of such a sequence.\n\
291 3 means C is the start of a two-char comment end sequence.\n\
292 4 means C is the second character of such a sequence.\n\
294 There can be up to two orthogonal comment sequences. This is to support\n\
295 language modes such as C++. By default, all comment sequences are of style\n\
296 a, but you can set the comment sequence style to b (on the second character\n\
297 of a comment-start, or the first character of a comment-end sequence) using\n\
299 b means C is part of comment sequence b.\n\
301 p means C is a prefix character for `backward-prefix-chars';\n\
302 such characters are treated as whitespace when they occur\n\
303 between expressions.")
307 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
308 /* I really don't know why this is interactive
309 help-form should at least be made useful whilst reading the second arg
311 "cSet syntax for character: \nsSet syntax for %s to: ",
312 0 /* See immediately above */)
313 (c
, newentry
, syntax_table
)
314 Lisp_Object c
, newentry
, syntax_table
;
316 register unsigned char *p
;
317 register enum syntaxcode code
;
322 CHECK_STRING (newentry
, 1);
324 if (NILP (syntax_table
))
325 syntax_table
= current_buffer
->syntax_table
;
327 check_syntax_table (syntax_table
);
329 p
= XSTRING (newentry
)->data
;
330 code
= (enum syntaxcode
) syntax_spec_code
[*p
++];
331 if (((int) code
& 0377) == 0377)
332 error ("invalid syntax description letter: %c", c
);
334 if (code
== Sinherit
)
336 SET_RAW_SYNTAX_ENTRY (syntax_table
, c
, Qnil
);
341 XSETINT (match
, *p
++);
342 if (XFASTINT (match
) == ' ')
374 SET_RAW_SYNTAX_ENTRY (syntax_table
, c
,
375 Fcons (make_number (val
), match
));
380 /* Dump syntax table to buffer in human-readable format */
383 describe_syntax (value
)
386 register enum syntaxcode code
;
387 char desc
, match
, start1
, start2
, end1
, end2
, prefix
, comstyle
;
389 Lisp_Object first
, match_lisp
;
391 Findent_to (make_number (16), make_number (1));
395 insert_string ("inherit");
401 insert_string ("invalid");
405 first
= XCONS (value
)->car
;
406 match_lisp
= XCONS (value
)->cdr
;
408 if (!INTEGERP (first
) || !(NILP (match_lisp
) || INTEGERP (match_lisp
)))
410 insert_string ("invalid");
414 code
= (enum syntaxcode
) (first
& 0377);
415 start1
= (XINT (first
) >> 16) & 1;
416 start2
= (XINT (first
) >> 17) & 1;
417 end1
= (XINT (first
) >> 18) & 1;
418 end2
= (XINT (first
) >> 19) & 1;
419 prefix
= (XINT (first
) >> 20) & 1;
420 comstyle
= (XINT (first
) >> 21) & 1;
422 if ((int) code
< 0 || (int) code
>= (int) Smax
)
424 insert_string ("invalid");
427 desc
= syntax_code_spec
[(int) code
];
429 str
[0] = desc
, str
[1] = 0;
432 str
[0] = !NILP (match_lisp
) ? XINT (match_lisp
) : ' ';
450 insert_string ("\twhich means: ");
452 switch (SWITCH_ENUM_CAST (code
))
455 insert_string ("whitespace"); break;
457 insert_string ("punctuation"); break;
459 insert_string ("word"); break;
461 insert_string ("symbol"); break;
463 insert_string ("open"); break;
465 insert_string ("close"); break;
467 insert_string ("quote"); break;
469 insert_string ("string"); break;
471 insert_string ("math"); break;
473 insert_string ("escape"); break;
475 insert_string ("charquote"); break;
477 insert_string ("comment"); break;
479 insert_string ("endcomment"); break;
481 insert_string ("invalid");
485 if (!NILP (match_lisp
))
487 insert_string (", matches ");
488 insert_char (XINT (match_lisp
));
492 insert_string (",\n\t is the first character of a comment-start sequence");
494 insert_string (",\n\t is the second character of a comment-start sequence");
497 insert_string (",\n\t is the first character of a comment-end sequence");
499 insert_string (",\n\t is the second character of a comment-end sequence");
501 insert_string (" (comment style b)");
504 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
506 insert_string ("\n");
510 describe_syntax_1 (vector
)
513 struct buffer
*old
= current_buffer
;
514 set_buffer_internal (XBUFFER (Vstandard_output
));
515 describe_vector (vector
, Qnil
, describe_syntax
, 0, Qnil
, Qnil
);
516 call0 (intern ("help-mode"));
517 set_buffer_internal (old
);
521 DEFUN ("describe-syntax", Fdescribe_syntax
, Sdescribe_syntax
, 0, 0, "",
522 "Describe the syntax specifications in the syntax table.\n\
523 The descriptions are inserted in a buffer, which is then displayed.")
526 internal_with_output_to_temp_buffer
527 ("*Help*", describe_syntax_1
, current_buffer
->syntax_table
);
532 /* Return the position across COUNT words from FROM.
533 If that many words cannot be found before the end of the buffer, return 0.
534 COUNT negative means scan backward and stop at word beginning. */
536 scan_words (from
, count
)
537 register int from
, count
;
539 register int beg
= BEGV
;
540 register int end
= ZV
;
556 charcode
= FETCH_CHAR (from
);
557 code
= SYNTAX (charcode
);
558 if (words_include_escapes
559 && (code
== Sescape
|| code
== Scharquote
))
567 if (from
== end
) break;
568 charcode
= FETCH_CHAR (from
);
569 code
= SYNTAX (charcode
);
570 if (!(words_include_escapes
571 && (code
== Sescape
|| code
== Scharquote
)))
587 charcode
= FETCH_CHAR (from
- 1);
588 code
= SYNTAX (charcode
);
589 if (words_include_escapes
590 && (code
== Sescape
|| code
== Scharquote
))
598 if (from
== beg
) break;
599 charcode
= FETCH_CHAR (from
- 1);
600 code
= SYNTAX (charcode
);
601 if (!(words_include_escapes
602 && (code
== Sescape
|| code
== Scharquote
)))
615 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 1, 1, "p",
616 "Move point forward ARG words (backward if ARG is negative).\n\
617 Normally returns t.\n\
618 If an edge of the buffer is reached, point is left there\n\
619 and nil is returned.")
624 CHECK_NUMBER (count
, 0);
626 if (!(val
= scan_words (point
, XINT (count
))))
628 SET_PT (XINT (count
) > 0 ? ZV
: BEGV
);
635 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
636 "Move forward across up to N comments. If N is negative, move backward.\n\
637 Stop scanning if we find something other than a comment or whitespace.\n\
638 Set point to where scanning stops.\n\
639 If N comments are found as expected, with nothing except whitespace\n\
640 between them, return t; otherwise return nil.")
647 register enum syntaxcode code
;
648 int comstyle
= 0; /* style of comment encountered */
652 CHECK_NUMBER (count
, 0);
653 count1
= XINT (count
);
671 c
= FETCH_CHAR (from
);
675 if (from
< stop
&& SYNTAX_COMSTART_FIRST (c
)
676 && (c1
= FETCH_CHAR (from
),
677 SYNTAX_COMSTART_SECOND (c1
)))
679 /* We have encountered a comment start sequence and we
680 are ignoring all text inside comments. We must record
681 the comment style this sequence begins so that later,
682 only a comment end of the same style actually ends
683 the comment section. */
685 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
689 while (code
== Swhitespace
|| code
== Sendcomment
);
690 if (code
!= Scomment
)
696 /* We're at the start of a comment. */
705 c
= FETCH_CHAR (from
);
707 if (SYNTAX (c
) == Sendcomment
708 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
709 /* we have encountered a comment end of the same style
710 as the comment sequence which began this comment
713 if (from
< stop
&& SYNTAX_COMEND_FIRST (c
)
714 && (c1
= FETCH_CHAR (from
),
715 SYNTAX_COMEND_SECOND (c1
))
716 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
717 /* we have encountered a comment end of the same style
718 as the comment sequence which began this comment
722 /* We have skipped one comment. */
734 quoted
= char_quoted (from
);
737 c
= FETCH_CHAR (from
);
740 if (code
== Sendcomment
)
741 comstyle
= SYNTAX_COMMENT_STYLE (c
);
742 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
743 && (c1
= FETCH_CHAR (from
- 1),
744 SYNTAX_COMEND_FIRST (c1
))
745 && !char_quoted (from
- 1))
747 /* We must record the comment style encountered so that
748 later, we can match only the proper comment begin
749 sequence of the same style. */
751 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
755 if (code
== Sendcomment
&& !quoted
)
758 if (code
!= SYNTAX (c
))
759 /* For a two-char comment ender, we can assume
760 it does end a comment. So scan back in a simple way. */
762 if (from
!= stop
) from
--;
765 if ((c
= FETCH_CHAR (from
),
766 SYNTAX (c
) == Scomment
)
767 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
776 if (SYNTAX_COMSTART_SECOND (c
)
777 && (c1
= FETCH_CHAR (from
),
778 SYNTAX_COMSTART_FIRST (c1
))
779 && SYNTAX_COMMENT_STYLE (c
) == comstyle
780 && !char_quoted (from
))
787 /* Look back, counting the parity of string-quotes,
788 and recording the comment-starters seen.
789 When we reach a safe place, assume that's not in a string;
790 then step the main scan to the earliest comment-starter seen
791 an even number of string quotes away from the safe place.
793 OFROM[I] is position of the earliest comment-starter seen
794 which is I+2X quotes from the comment-end.
795 PARITY is current parity of quotes from the comment end. */
798 char my_stringend
= 0;
799 int string_lossage
= 0;
800 int comment_end
= from
;
801 int comstart_pos
= 0;
802 int comstart_parity
= 0;
803 int scanstart
= from
- 1;
805 /* At beginning of range to scan, we're outside of strings;
806 that determines quote parity to the comment-end. */
809 /* Move back and examine a character. */
812 c
= FETCH_CHAR (from
);
815 /* If this char is the second of a 2-char comment sequence,
816 back up and give the pair the appropriate syntax. */
817 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
818 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1)))
822 c
= FETCH_CHAR (from
);
825 /* If this char starts a 2-char comment start sequence,
826 treat it like a 1-char comment starter. */
827 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
)
828 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
+ 1))
829 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
+ 1)))
832 /* Ignore escaped characters. */
833 if (char_quoted (from
))
836 /* Track parity of quotes. */
840 if (my_stringend
== 0)
842 /* If we have two kinds of string delimiters.
843 There's no way to grok this scanning backwards. */
844 else if (my_stringend
!= c
)
848 /* Record comment-starters according to that
849 quote-parity to the comment-end. */
850 if (code
== Scomment
)
852 comstart_parity
= parity
;
856 /* If we find another earlier comment-ender,
857 any comment-starts earlier than that don't count
858 (because they go with the earlier comment-ender). */
859 if (code
== Sendcomment
860 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
)) == comstyle
)
863 /* Assume a defun-start point is outside of strings. */
865 && (from
== stop
|| FETCH_CHAR (from
- 1) == '\n'))
869 if (comstart_pos
== 0)
871 /* If the earliest comment starter
872 is followed by uniform paired string quotes or none,
873 we know it can't be inside a string
874 since if it were then the comment ender would be inside one.
875 So it does start a comment. Skip back to it. */
876 else if (comstart_parity
== 0 && !string_lossage
)
880 /* We had two kinds of string delimiters mixed up
881 together. Decode this going forwards.
882 Scan fwd from the previous comment ender
883 to the one in question; this records where we
884 last passed a comment starter. */
885 struct lisp_parse_state state
;
886 scan_sexps_forward (&state
, find_defun_start (comment_end
),
887 comment_end
- 1, -10000, 0, Qnil
, 0);
889 from
= state
.comstart
;
891 /* We can't grok this as a comment; scan it normally. */
895 /* We have skipped one comment. */
898 else if ((code
!= Swhitespace
&& code
!= Scomment
) || quoted
)
914 int parse_sexp_ignore_comments
;
917 scan_lists (from
, count
, depth
, sexpflag
)
919 int count
, depth
, sexpflag
;
924 unsigned char stringterm
;
927 register enum syntaxcode code
;
928 int min_depth
= depth
; /* Err out if depth gets less than this. */
929 int comstyle
= 0; /* style of comment encountered */
931 if (depth
> 0) min_depth
= 0;
941 c
= FETCH_CHAR (from
);
944 if (from
< stop
&& SYNTAX_COMSTART_FIRST (c
)
945 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
))
946 && parse_sexp_ignore_comments
)
948 /* we have encountered a comment start sequence and we
949 are ignoring all text inside comments. we must record
950 the comment style this sequence begins so that later,
951 only a comment end of the same style actually ends
952 the comment section */
954 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
));
958 if (SYNTAX_PREFIX (c
))
961 switch (SWITCH_ENUM_CAST (code
))
965 if (from
== stop
) goto lose
;
967 /* treat following character as a word constituent */
970 if (depth
|| !sexpflag
) break;
971 /* This word counts as a sexp; return at end of it. */
974 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
979 if (from
== stop
) goto lose
;
993 if (!parse_sexp_ignore_comments
) break;
1002 c
= FETCH_CHAR (from
);
1003 if (SYNTAX (c
) == Sendcomment
1004 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
1005 /* we have encountered a comment end of the same style
1006 as the comment sequence which began this comment
1010 if (from
< stop
&& SYNTAX_COMEND_FIRST (c
)
1011 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from
))
1012 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
1013 /* we have encountered a comment end of the same style
1014 as the comment sequence which began this comment
1023 if (from
!= stop
&& c
== FETCH_CHAR (from
))
1033 if (!++depth
) goto done
;
1038 if (!--depth
) goto done
;
1039 if (depth
< min_depth
)
1040 error ("Containing expression ends prematurely");
1044 stringterm
= FETCH_CHAR (from
- 1);
1047 if (from
>= stop
) goto lose
;
1048 if (FETCH_CHAR (from
) == stringterm
) break;
1049 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1058 if (!depth
&& sexpflag
) goto done
;
1063 /* Reached end of buffer. Error if within object, return nil if between */
1064 if (depth
) goto lose
;
1069 /* End of object reached */
1080 if (quoted
= char_quoted (from
))
1082 c
= FETCH_CHAR (from
);
1085 if (code
== Sendcomment
)
1086 comstyle
= SYNTAX_COMMENT_STYLE (c
);
1087 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1088 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1))
1089 && !char_quoted (from
- 1)
1090 && parse_sexp_ignore_comments
)
1092 /* we must record the comment style encountered so that
1093 later, we can match only the proper comment begin
1094 sequence of the same style */
1096 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
- 1));
1100 if (SYNTAX_PREFIX (c
))
1103 switch (SWITCH_ENUM_CAST (quoted
? Sword
: code
))
1107 if (depth
|| !sexpflag
) break;
1108 /* This word counts as a sexp; count object finished after passing it. */
1111 quoted
= char_quoted (from
- 1);
1114 if (! (quoted
|| SYNTAX (FETCH_CHAR (from
- 1)) == Sword
1115 || SYNTAX (FETCH_CHAR (from
- 1)) == Ssymbol
1116 || SYNTAX (FETCH_CHAR (from
- 1)) == Squote
))
1125 if (from
!= stop
&& c
== FETCH_CHAR (from
- 1))
1135 if (!++depth
) goto done2
;
1140 if (!--depth
) goto done2
;
1141 if (depth
< min_depth
)
1142 error ("Containing expression ends prematurely");
1146 if (!parse_sexp_ignore_comments
)
1149 if (code
!= SYNTAX (c
))
1150 /* For a two-char comment ender, we can assume
1151 it does end a comment. So scan back in a simple way. */
1153 if (from
!= stop
) from
--;
1156 if (SYNTAX (c
= FETCH_CHAR (from
)) == Scomment
1157 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
1166 if (SYNTAX_COMSTART_SECOND (c
)
1167 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from
))
1168 && SYNTAX_COMMENT_STYLE (c
) == comstyle
1169 && !char_quoted (from
))
1176 /* Look back, counting the parity of string-quotes,
1177 and recording the comment-starters seen.
1178 When we reach a safe place, assume that's not in a string;
1179 then step the main scan to the earliest comment-starter seen
1180 an even number of string quotes away from the safe place.
1182 OFROM[I] is position of the earliest comment-starter seen
1183 which is I+2X quotes from the comment-end.
1184 PARITY is current parity of quotes from the comment end. */
1187 char my_stringend
= 0;
1188 int string_lossage
= 0;
1189 int comment_end
= from
;
1190 int comstart_pos
= 0;
1191 int comstart_parity
= 0;
1192 int scanstart
= from
- 1;
1194 /* At beginning of range to scan, we're outside of strings;
1195 that determines quote parity to the comment-end. */
1196 while (from
!= stop
)
1198 /* Move back and examine a character. */
1201 c
= FETCH_CHAR (from
);
1204 /* If this char is the second of a 2-char comment sequence,
1205 back up and give the pair the appropriate syntax. */
1206 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1207 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1)))
1211 c
= FETCH_CHAR (from
);
1214 /* If this char starts a 2-char comment start sequence,
1215 treat it like a 1-char comment starter. */
1216 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
)
1217 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
+ 1))
1218 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
+ 1)))
1221 /* Ignore escaped characters. */
1222 if (char_quoted (from
))
1225 /* Track parity of quotes. */
1226 if (code
== Sstring
)
1229 if (my_stringend
== 0)
1231 /* If we have two kinds of string delimiters.
1232 There's no way to grok this scanning backwards. */
1233 else if (my_stringend
!= c
)
1237 /* Record comment-starters according to that
1238 quote-parity to the comment-end. */
1239 if (code
== Scomment
)
1241 comstart_parity
= parity
;
1242 comstart_pos
= from
;
1245 /* If we find another earlier comment-ender,
1246 any comment-starts earlier than that don't count
1247 (because they go with the earlier comment-ender). */
1248 if (code
== Sendcomment
1249 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
)) == comstyle
)
1252 /* Assume a defun-start point is outside of strings. */
1254 && (from
== stop
|| FETCH_CHAR (from
- 1) == '\n'))
1258 if (comstart_pos
== 0)
1260 /* If the earliest comment starter
1261 is followed by uniform paired string quotes or none,
1262 we know it can't be inside a string
1263 since if it were then the comment ender would be inside one.
1264 So it does start a comment. Skip back to it. */
1265 else if (comstart_parity
== 0 && !string_lossage
)
1266 from
= comstart_pos
;
1269 /* We had two kinds of string delimiters mixed up
1270 together. Decode this going forwards.
1271 Scan fwd from the previous comment ender
1272 to the one in question; this records where we
1273 last passed a comment starter. */
1274 struct lisp_parse_state state
;
1275 scan_sexps_forward (&state
, find_defun_start (comment_end
),
1276 comment_end
- 1, -10000, 0, Qnil
, 0);
1277 if (state
.incomment
)
1278 from
= state
.comstart
;
1280 /* We can't grok this as a comment; scan it normally. */
1287 stringterm
= FETCH_CHAR (from
);
1290 if (from
== stop
) goto lose
;
1291 if (!char_quoted (from
- 1)
1292 && stringterm
== FETCH_CHAR (from
- 1))
1297 if (!depth
&& sexpflag
) goto done2
;
1302 /* Reached start of buffer. Error if within object, return nil if between */
1303 if (depth
) goto lose
;
1314 XSETFASTINT (val
, from
);
1318 error ("Unbalanced parentheses");
1326 register enum syntaxcode code
;
1327 register int beg
= BEGV
;
1328 register int quoted
= 0;
1331 && ((code
= SYNTAX (FETCH_CHAR (pos
- 1))) == Scharquote
1332 || code
== Sescape
))
1333 pos
--, quoted
= !quoted
;
1337 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
1338 "Scan from character number FROM by COUNT lists.\n\
1339 Returns the character number of the position thus found.\n\
1341 If DEPTH is nonzero, paren depth begins counting from that value,\n\
1342 only places where the depth in parentheses becomes zero\n\
1343 are candidates for stopping; COUNT such places are counted.\n\
1344 Thus, a positive value for DEPTH means go out levels.\n\
1346 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1348 If the beginning or end of (the accessible part of) the buffer is reached\n\
1349 and the depth is wrong, an error is signaled.\n\
1350 If the depth is right but the count is not used up, nil is returned.")
1351 (from
, count
, depth
)
1352 Lisp_Object from
, count
, depth
;
1354 CHECK_NUMBER (from
, 0);
1355 CHECK_NUMBER (count
, 1);
1356 CHECK_NUMBER (depth
, 2);
1358 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
1361 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
1362 "Scan from character number FROM by COUNT balanced expressions.\n\
1363 If COUNT is negative, scan backwards.\n\
1364 Returns the character number of the position thus found.\n\
1366 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1368 If the beginning or end of (the accessible part of) the buffer is reached\n\
1369 in the middle of a parenthetical grouping, an error is signaled.\n\
1370 If the beginning or end is reached between groupings\n\
1371 but before count is used up, nil is returned.")
1373 Lisp_Object from
, count
;
1375 CHECK_NUMBER (from
, 0);
1376 CHECK_NUMBER (count
, 1);
1378 return scan_lists (XINT (from
), XINT (count
), 0, 1);
1381 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
1383 "Move point backward over any number of chars with prefix syntax.\n\
1384 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1390 while (pos
> beg
&& !char_quoted (pos
- 1)
1391 && (SYNTAX (FETCH_CHAR (pos
- 1)) == Squote
1392 || SYNTAX_PREFIX (FETCH_CHAR (pos
- 1))))
1400 /* Parse forward from FROM to END,
1401 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1402 and return a description of the state of the parse at END.
1403 If STOPBEFORE is nonzero, stop at the start of an atom.
1404 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1407 scan_sexps_forward (stateptr
, from
, end
, targetdepth
,
1408 stopbefore
, oldstate
, commentstop
)
1409 struct lisp_parse_state
*stateptr
;
1411 int end
, targetdepth
, stopbefore
;
1412 Lisp_Object oldstate
;
1415 struct lisp_parse_state state
;
1417 register enum syntaxcode code
;
1418 struct level
{ int last
, prev
; };
1419 struct level levelstart
[100];
1420 register struct level
*curlevel
= levelstart
;
1421 struct level
*endlevel
= levelstart
+ 100;
1423 register int depth
; /* Paren depth of current scanning location.
1424 level - levelstart equals this except
1425 when the depth becomes negative. */
1426 int mindepth
; /* Lowest DEPTH value seen. */
1427 int start_quoted
= 0; /* Nonzero means starting after a char quote */
1433 if (NILP (oldstate
))
1436 state
.instring
= -1;
1437 state
.incomment
= 0;
1438 state
.comstyle
= 0; /* comment style a by default */
1442 tem
= Fcar (oldstate
);
1448 oldstate
= Fcdr (oldstate
);
1449 oldstate
= Fcdr (oldstate
);
1450 oldstate
= Fcdr (oldstate
);
1451 tem
= Fcar (oldstate
);
1452 state
.instring
= !NILP (tem
) ? XINT (tem
) : -1;
1454 oldstate
= Fcdr (oldstate
);
1455 tem
= Fcar (oldstate
);
1456 state
.incomment
= !NILP (tem
);
1458 oldstate
= Fcdr (oldstate
);
1459 tem
= Fcar (oldstate
);
1460 start_quoted
= !NILP (tem
);
1462 /* if the eight element of the list is nil, we are in comment
1463 style a. if it is non-nil, we are in comment style b */
1464 oldstate
= Fcdr (oldstate
);
1465 oldstate
= Fcdr (oldstate
);
1466 tem
= Fcar (oldstate
);
1467 state
.comstyle
= !NILP (tem
);
1472 curlevel
->prev
= -1;
1473 curlevel
->last
= -1;
1475 /* Enter the loop at a place appropriate for initial state. */
1477 if (state
.incomment
) goto startincomment
;
1478 if (state
.instring
>= 0)
1480 if (start_quoted
) goto startquotedinstring
;
1483 if (start_quoted
) goto startquoted
;
1487 code
= SYNTAX (FETCH_CHAR (from
));
1489 if (code
== Scomment
)
1490 state
.comstart
= from
-1;
1492 else if (from
< end
&& SYNTAX_COMSTART_FIRST (FETCH_CHAR (from
- 1))
1493 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
)))
1495 /* Record the comment style we have entered so that only
1496 the comment-end sequence of the same style actually
1497 terminates the comment section. */
1499 state
.comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
));
1500 state
.comstart
= from
-1;
1504 if (SYNTAX_PREFIX (FETCH_CHAR (from
- 1)))
1506 switch (SWITCH_ENUM_CAST (code
))
1510 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1511 curlevel
->last
= from
- 1;
1513 if (from
== end
) goto endquoted
;
1516 /* treat following character as a word constituent */
1519 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1520 curlevel
->last
= from
- 1;
1524 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1529 if (from
== end
) goto endquoted
;
1541 curlevel
->prev
= curlevel
->last
;
1549 /* Enter the loop in the middle so that we find
1550 a 2-char comment ender if we start in the middle of it. */
1551 prev
= FETCH_CHAR (from
- 1);
1552 goto startincomment_1
;
1554 /* At beginning of buffer, enter the loop the ordinary way. */
1557 state
.incomment
= 1;
1562 if (from
== end
) goto done
;
1563 prev
= FETCH_CHAR (from
);
1564 if (SYNTAX (prev
) == Sendcomment
1565 && SYNTAX_COMMENT_STYLE (prev
) == state
.comstyle
)
1566 /* Only terminate the comment section if the endcomment
1567 of the same style as the start sequence has been
1572 if (from
< end
&& SYNTAX_COMEND_FIRST (prev
)
1573 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from
))
1574 && SYNTAX_COMMENT_STYLE (prev
) == state
.comstyle
)
1575 /* Only terminate the comment section if the end-comment
1576 sequence of the same style as the start sequence has
1577 been encountered. */
1580 state
.incomment
= 0;
1581 state
.comstyle
= 0; /* reset the comment style */
1585 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1587 /* curlevel++->last ran into compiler bug on Apollo */
1588 curlevel
->last
= from
- 1;
1589 if (++curlevel
== endlevel
)
1590 error ("Nesting too deep for parser");
1591 curlevel
->prev
= -1;
1592 curlevel
->last
= -1;
1593 if (targetdepth
== depth
) goto done
;
1598 if (depth
< mindepth
)
1600 if (curlevel
!= levelstart
)
1602 curlevel
->prev
= curlevel
->last
;
1603 if (targetdepth
== depth
) goto done
;
1607 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1608 curlevel
->last
= from
- 1;
1609 state
.instring
= FETCH_CHAR (from
- 1);
1613 if (from
>= end
) goto done
;
1614 if (FETCH_CHAR (from
) == state
.instring
) break;
1615 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1620 startquotedinstring
:
1621 if (from
>= end
) goto endquoted
;
1625 state
.instring
= -1;
1626 curlevel
->prev
= curlevel
->last
;
1636 stop
: /* Here if stopping before start of sexp. */
1637 from
--; /* We have just fetched the char that starts it; */
1638 goto done
; /* but return the position before it. */
1643 state
.depth
= depth
;
1644 state
.mindepth
= mindepth
;
1645 state
.thislevelstart
= curlevel
->prev
;
1646 state
.prevlevelstart
1647 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
1648 state
.location
= from
;
1654 /* This comment supplies the doc string for parse-partial-sexp,
1655 for make-docfile to see. We cannot put this in the real DEFUN
1656 due to limits in the Unix cpp.
1658 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
1659 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1660 Parsing stops at TO or when certain criteria are met;\n\
1661 point is set to where parsing stops.\n\
1662 If fifth arg STATE is omitted or nil,\n\
1663 parsing assumes that FROM is the beginning of a function.\n\
1664 Value is a list of eight elements describing final state of parsing:\n\
1665 0. depth in parens.\n\
1666 1. character address of start of innermost containing list; nil if none.\n\
1667 2. character address of start of last complete sexp terminated.\n\
1668 3. non-nil if inside a string.\n\
1669 (it is the character that will terminate the string.)\n\
1670 4. t if inside a comment.\n\
1671 5. t if following a quote character.\n\
1672 6. the minimum paren-depth encountered during this scan.\n\
1673 7. t if in a comment of style `b'.\n\
1674 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1675 in parentheses becomes equal to TARGETDEPTH.\n\
1676 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1677 any character that starts a sexp.\n\
1678 Fifth arg STATE is an eight-list like what this function returns.\n\
1679 It is used to initialize the state of the parse. Its second and third
1680 elements are ignored.
1681 Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1682 (from, to, targetdepth, stopbefore, state, commentstop)
1685 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
1686 0 /* See immediately above */)
1687 (from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
)
1688 Lisp_Object from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
;
1690 struct lisp_parse_state state
;
1693 if (!NILP (targetdepth
))
1695 CHECK_NUMBER (targetdepth
, 3);
1696 target
= XINT (targetdepth
);
1699 target
= -100000; /* We won't reach this depth */
1701 validate_region (&from
, &to
);
1702 scan_sexps_forward (&state
, XINT (from
), XINT (to
),
1703 target
, !NILP (stopbefore
), oldstate
,
1704 !NILP (commentstop
));
1706 SET_PT (state
.location
);
1708 return Fcons (make_number (state
.depth
),
1709 Fcons (state
.prevlevelstart
< 0 ? Qnil
: make_number (state
.prevlevelstart
),
1710 Fcons (state
.thislevelstart
< 0 ? Qnil
: make_number (state
.thislevelstart
),
1711 Fcons (state
.instring
>= 0 ? make_number (state
.instring
) : Qnil
,
1712 Fcons (state
.incomment
? Qt
: Qnil
,
1713 Fcons (state
.quoted
? Qt
: Qnil
,
1714 Fcons (make_number (state
.mindepth
),
1715 Fcons (state
.comstyle
? Qt
: Qnil
,
1724 /* This has to be done here, before we call Fmake_char_table. */
1725 Qsyntax_table
= intern ("syntax-table");
1726 staticpro (&Qsyntax_table
);
1728 /* Intern this now in case it isn't already done.
1729 Setting this variable twice is harmless.
1730 But don't staticpro it here--that is done in alloc.c. */
1731 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1733 /* Now we are ready to set up this property, so we can
1734 create syntax tables. */
1735 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
1737 temp
= Fcons (make_number ((int) Swhitespace
), Qnil
);
1739 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
1741 temp
= Fcons (make_number ((int) Sword
), Qnil
);
1742 for (i
= 'a'; i
<= 'z'; i
++)
1743 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
1744 for (i
= 'A'; i
<= 'Z'; i
++)
1745 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
1746 for (i
= '0'; i
<= '9'; i
++)
1747 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
1749 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
1750 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
1752 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
1753 Fcons (make_number (Sopen
), make_number (')')));
1754 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
1755 Fcons (make_number (Sclose
), make_number ('(')));
1756 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
1757 Fcons (make_number (Sopen
), make_number (']')));
1758 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
1759 Fcons (make_number (Sclose
), make_number ('[')));
1760 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
1761 Fcons (make_number (Sopen
), make_number ('}')));
1762 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
1763 Fcons (make_number (Sclose
), make_number ('{')));
1764 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
1765 Fcons (make_number ((int) Sstring
), Qnil
));
1766 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
1767 Fcons (make_number ((int) Sescape
), Qnil
));
1769 temp
= Fcons (make_number ((int) Ssymbol
), Qnil
);
1770 for (i
= 0; i
< 10; i
++)
1771 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, "_-+*/&|<>="[i
], temp
);
1773 temp
= Fcons (make_number ((int) Spunct
), Qnil
);
1774 for (i
= 0; i
< 12; i
++)
1775 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ".,;:?!#@~^'`"[i
], temp
);
1780 Qsyntax_table_p
= intern ("syntax-table-p");
1781 staticpro (&Qsyntax_table_p
);
1783 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments
,
1784 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1786 words_include_escapes
= 0;
1787 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes
,
1788 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1790 defsubr (&Ssyntax_table_p
);
1791 defsubr (&Ssyntax_table
);
1792 defsubr (&Sstandard_syntax_table
);
1793 defsubr (&Scopy_syntax_table
);
1794 defsubr (&Sset_syntax_table
);
1795 defsubr (&Schar_syntax
);
1796 defsubr (&Smatching_paren
);
1797 defsubr (&Smodify_syntax_entry
);
1798 defsubr (&Sdescribe_syntax
);
1800 defsubr (&Sforward_word
);
1802 defsubr (&Sforward_comment
);
1803 defsubr (&Sscan_lists
);
1804 defsubr (&Sscan_sexps
);
1805 defsubr (&Sbackward_prefix_chars
);
1806 defsubr (&Sparse_partial_sexp
);