1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993, 1994, 1995 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
28 Lisp_Object Qsyntax_table_p
, Qsyntax_table
;
30 static void scan_sexps_forward ();
31 static int char_quoted ();
33 int words_include_escapes
;
35 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
36 if not compiled with GCC. No need to mark it, since it is used
37 only very temporarily. */
38 Lisp_Object syntax_temp
;
40 /* This is the internal form of the parse state used in parse-partial-sexp. */
42 struct lisp_parse_state
44 int depth
; /* Depth at end of parsing */
45 int instring
; /* -1 if not within string, else desired terminator. */
46 int incomment
; /* Nonzero if within a comment at end of parsing */
47 int comstyle
; /* comment style a=0, or b=1 */
48 int quoted
; /* Nonzero if just after an escape char at end of parsing */
49 int thislevelstart
; /* Char number of most recent start-of-expression at current level */
50 int prevlevelstart
; /* Char number of start of containing expression */
51 int location
; /* Char number at which parsing stopped. */
52 int mindepth
; /* Minimum depth seen while scanning. */
53 int comstart
; /* Position just after last comment starter. */
56 /* These variables are a cache for finding the start of a defun.
57 find_start_pos is the place for which the defun start was found.
58 find_start_value is the defun start position found for it.
59 find_start_buffer is the buffer it was found in.
60 find_start_begv is the BEGV value when it was found.
61 find_start_modiff is the value of MODIFF when it was found. */
63 static int find_start_pos
;
64 static int find_start_value
;
65 static struct buffer
*find_start_buffer
;
66 static int find_start_begv
;
67 static int find_start_modiff
;
69 /* Find a defun-start that is the last one before POS (or nearly the last).
70 We record what we find, so that another call in the same area
71 can return the same value right away. */
74 find_defun_start (pos
)
80 /* Use previous finding, if it's valid and applies to this inquiry. */
81 if (current_buffer
== find_start_buffer
82 /* Reuse the defun-start even if POS is a little farther on.
83 POS might be in the next defun, but that's ok.
84 Our value may not be the best possible, but will still be usable. */
85 && pos
<= find_start_pos
+ 1000
86 && pos
>= find_start_value
87 && BEGV
== find_start_begv
88 && MODIFF
== find_start_modiff
)
89 return find_start_value
;
91 /* Back up to start of line. */
92 tem
= scan_buffer ('\n', pos
, BEGV
, -1, &shortage
, 1);
96 /* Open-paren at start of line means we found our defun-start. */
97 if (SYNTAX (FETCH_CHAR (tem
)) == Sopen
)
99 /* Move to beg of previous line. */
100 tem
= scan_buffer ('\n', tem
, BEGV
, -2, &shortage
, 1);
103 /* Record what we found, for the next try. */
104 find_start_value
= tem
;
105 find_start_buffer
= current_buffer
;
106 find_start_modiff
= MODIFF
;
107 find_start_begv
= BEGV
;
108 find_start_pos
= pos
;
110 return find_start_value
;
113 DEFUN ("syntax-table-p", Fsyntax_table_p
, Ssyntax_table_p
, 1, 1, 0,
114 "Return t if OBJECT is a syntax table.\n\
115 Currently, any char-table counts as a syntax table.")
119 if (CHAR_TABLE_P (object
)
120 && XCHAR_TABLE (object
)->purpose
== Qsyntax_table
)
126 check_syntax_table (obj
)
129 if (!(CHAR_TABLE_P (obj
)
130 && XCHAR_TABLE (obj
)->purpose
== Qsyntax_table
))
131 wrong_type_argument (Qsyntax_table_p
, obj
);
134 DEFUN ("syntax-table", Fsyntax_table
, Ssyntax_table
, 0, 0, 0,
135 "Return the current syntax table.\n\
136 This is the one specified by the current buffer.")
139 return current_buffer
->syntax_table
;
142 DEFUN ("standard-syntax-table", Fstandard_syntax_table
,
143 Sstandard_syntax_table
, 0, 0, 0,
144 "Return the standard syntax table.\n\
145 This is the one used for new buffers.")
148 return Vstandard_syntax_table
;
151 DEFUN ("copy-syntax-table", Fcopy_syntax_table
, Scopy_syntax_table
, 0, 1, 0,
152 "Construct a new syntax table and return it.\n\
153 It is a copy of the TABLE, which defaults to the standard syntax table.")
160 check_syntax_table (table
);
162 table
= Vstandard_syntax_table
;
164 copy
= Fcopy_sequence (table
);
165 Fset_char_table_parent (copy
, Vstandard_syntax_table
);
169 DEFUN ("set-syntax-table", Fset_syntax_table
, Sset_syntax_table
, 1, 1, 0,
170 "Select a new syntax table for the current buffer.\n\
171 One argument, a syntax table.")
175 check_syntax_table (table
);
176 current_buffer
->syntax_table
= table
;
177 /* Indicate that this buffer now has a specified syntax table. */
178 current_buffer
->local_var_flags
179 |= XFASTINT (buffer_local_flags
.syntax_table
);
183 /* Convert a letter which signifies a syntax code
184 into the code it signifies.
185 This is used by modify-syntax-entry, and other things. */
187 unsigned char syntax_spec_code
[0400] =
188 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
189 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
190 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
191 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
192 (char) Swhitespace
, 0377, (char) Sstring
, 0377,
193 (char) Smath
, 0377, 0377, (char) Squote
,
194 (char) Sopen
, (char) Sclose
, 0377, 0377,
195 0377, (char) Swhitespace
, (char) Spunct
, (char) Scharquote
,
196 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
197 0377, 0377, 0377, 0377,
198 (char) Scomment
, 0377, (char) Sendcomment
, 0377,
199 (char) Sinherit
, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
200 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
201 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
202 0377, 0377, 0377, 0377, (char) Sescape
, 0377, 0377, (char) Ssymbol
,
203 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
204 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
205 0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword
,
206 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377
209 /* Indexed by syntax code, give the letter that describes it. */
211 char syntax_code_spec
[14] =
213 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@'
216 /* Look up the value for CHARACTER in syntax table TABLE's parent
217 and its parents. SYNTAX_ENTRY calls this, when TABLE itself has nil
218 for CHARACTER. It's actually used only when not compiled with GCC. */
221 syntax_parent_lookup (table
, character
)
229 table
= XCHAR_TABLE (table
)->parent
;
233 value
= XCHAR_TABLE (table
)->contents
[character
];
239 DEFUN ("char-syntax", Fchar_syntax
, Schar_syntax
, 1, 1, 0,
240 "Return the syntax code of CHARACTER, described by a character.\n\
241 For example, if CHARACTER is a word constituent,\n\
242 the character `w' is returned.\n\
243 The characters that correspond to various syntax codes\n\
244 are listed in the documentation of `modify-syntax-entry'.")
246 Lisp_Object character
;
249 CHECK_NUMBER (character
, 0);
250 char_int
= XINT (character
);
251 return make_number (syntax_code_spec
[(int) SYNTAX (char_int
)]);
254 DEFUN ("matching-paren", Fmatching_paren
, Smatching_paren
, 1, 1, 0,
255 "Return the matching parenthesis of CHARACTER, or nil if none.")
257 Lisp_Object character
;
260 CHECK_NUMBER (character
, 0);
261 char_int
= XINT (character
);
262 code
= SYNTAX (char_int
);
263 if (code
== Sopen
|| code
== Sclose
)
264 return make_number (SYNTAX_MATCH (char_int
));
268 /* This comment supplies the doc string for modify-syntax-entry,
269 for make-docfile to see. We cannot put this in the real DEFUN
270 due to limits in the Unix cpp.
272 DEFUN ("modify-syntax-entry", foo, bar, 2, 3, 0,
273 "Set syntax for character CHAR according to string S.\n\
274 The syntax is changed only for table TABLE, which defaults to\n\
275 the current buffer's syntax table.\n\
276 The first character of S should be one of the following:\n\
277 Space or - whitespace syntax. w word constituent.\n\
278 _ symbol constituent. . punctuation.\n\
279 ( open-parenthesis. ) close-parenthesis.\n\
280 \" string quote. \\ escape.\n\
281 $ paired delimiter. ' expression quote or prefix operator.\n\
282 < comment starter. > comment ender.\n\
283 / character-quote. @ inherit from `standard-syntax-table'.\n\
285 Only single-character comment start and end sequences are represented thus.\n\
286 Two-character sequences are represented as described below.\n\
287 The second character of S is the matching parenthesis,\n\
288 used only if the first character is `(' or `)'.\n\
289 Any additional characters are flags.\n\
290 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\
291 1 means CHAR is the start of a two-char comment start sequence.\n\
292 2 means CHAR is the second character of such a sequence.\n\
293 3 means CHAR is the start of a two-char comment end sequence.\n\
294 4 means CHAR is the second character of such a sequence.\n\
296 There can be up to two orthogonal comment sequences. This is to support\n\
297 language modes such as C++. By default, all comment sequences are of style\n\
298 a, but you can set the comment sequence style to b (on the second character\n\
299 of a comment-start, or the first character of a comment-end sequence) using\n\
301 b means CHAR is part of comment sequence b.\n\
303 p means CHAR is a prefix character for `backward-prefix-chars';\n\
304 such characters are treated as whitespace when they occur\n\
305 between expressions.")
309 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry
, Smodify_syntax_entry
, 2, 3,
310 /* I really don't know why this is interactive
311 help-form should at least be made useful whilst reading the second arg
313 "cSet syntax for character: \nsSet syntax for %s to: ",
314 0 /* See immediately above */)
315 (c
, newentry
, syntax_table
)
316 Lisp_Object c
, newentry
, syntax_table
;
318 register unsigned char *p
;
319 register enum syntaxcode code
;
324 CHECK_STRING (newentry
, 1);
326 if (NILP (syntax_table
))
327 syntax_table
= current_buffer
->syntax_table
;
329 check_syntax_table (syntax_table
);
331 p
= XSTRING (newentry
)->data
;
332 code
= (enum syntaxcode
) syntax_spec_code
[*p
++];
333 if (((int) code
& 0377) == 0377)
334 error ("invalid syntax description letter: %c", c
);
336 if (code
== Sinherit
)
338 SET_RAW_SYNTAX_ENTRY (syntax_table
, c
, Qnil
);
344 XSETINT (match
, *p
++);
345 if (XFASTINT (match
) == ' ')
380 SET_RAW_SYNTAX_ENTRY (syntax_table
, c
,
381 Fcons (make_number (val
), match
));
386 /* Dump syntax table to buffer in human-readable format */
389 describe_syntax (value
)
392 register enum syntaxcode code
;
393 char desc
, match
, start1
, start2
, end1
, end2
, prefix
, comstyle
;
395 Lisp_Object first
, match_lisp
;
397 Findent_to (make_number (16), make_number (1));
401 insert_string ("inherit");
407 insert_string ("invalid");
411 first
= XCONS (value
)->car
;
412 match_lisp
= XCONS (value
)->cdr
;
414 if (!INTEGERP (first
) || !(NILP (match_lisp
) || INTEGERP (match_lisp
)))
416 insert_string ("invalid");
420 code
= (enum syntaxcode
) (first
& 0377);
421 start1
= (XINT (first
) >> 16) & 1;
422 start2
= (XINT (first
) >> 17) & 1;
423 end1
= (XINT (first
) >> 18) & 1;
424 end2
= (XINT (first
) >> 19) & 1;
425 prefix
= (XINT (first
) >> 20) & 1;
426 comstyle
= (XINT (first
) >> 21) & 1;
428 if ((int) code
< 0 || (int) code
>= (int) Smax
)
430 insert_string ("invalid");
433 desc
= syntax_code_spec
[(int) code
];
435 str
[0] = desc
, str
[1] = 0;
438 str
[0] = !NILP (match_lisp
) ? XINT (match_lisp
) : ' ';
456 insert_string ("\twhich means: ");
458 switch (SWITCH_ENUM_CAST (code
))
461 insert_string ("whitespace"); break;
463 insert_string ("punctuation"); break;
465 insert_string ("word"); break;
467 insert_string ("symbol"); break;
469 insert_string ("open"); break;
471 insert_string ("close"); break;
473 insert_string ("quote"); break;
475 insert_string ("string"); break;
477 insert_string ("math"); break;
479 insert_string ("escape"); break;
481 insert_string ("charquote"); break;
483 insert_string ("comment"); break;
485 insert_string ("endcomment"); break;
487 insert_string ("invalid");
491 if (!NILP (match_lisp
))
493 insert_string (", matches ");
494 insert_char (XINT (match_lisp
));
498 insert_string (",\n\t is the first character of a comment-start sequence");
500 insert_string (",\n\t is the second character of a comment-start sequence");
503 insert_string (",\n\t is the first character of a comment-end sequence");
505 insert_string (",\n\t is the second character of a comment-end sequence");
507 insert_string (" (comment style b)");
510 insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
512 insert_string ("\n");
516 describe_syntax_1 (vector
)
519 struct buffer
*old
= current_buffer
;
520 set_buffer_internal (XBUFFER (Vstandard_output
));
521 describe_vector (vector
, Qnil
, describe_syntax
, 0, Qnil
, Qnil
);
522 call0 (intern ("help-mode"));
523 set_buffer_internal (old
);
527 DEFUN ("describe-syntax", Fdescribe_syntax
, Sdescribe_syntax
, 0, 0, "",
528 "Describe the syntax specifications in the syntax table.\n\
529 The descriptions are inserted in a buffer, which is then displayed.")
532 internal_with_output_to_temp_buffer
533 ("*Help*", describe_syntax_1
, current_buffer
->syntax_table
);
538 /* Return the position across COUNT words from FROM.
539 If that many words cannot be found before the end of the buffer, return 0.
540 COUNT negative means scan backward and stop at word beginning. */
542 scan_words (from
, count
)
543 register int from
, count
;
545 register int beg
= BEGV
;
546 register int end
= ZV
;
562 charcode
= FETCH_CHAR (from
);
563 code
= SYNTAX (charcode
);
564 if (words_include_escapes
565 && (code
== Sescape
|| code
== Scharquote
))
573 if (from
== end
) break;
574 charcode
= FETCH_CHAR (from
);
575 code
= SYNTAX (charcode
);
576 if (!(words_include_escapes
577 && (code
== Sescape
|| code
== Scharquote
)))
593 charcode
= FETCH_CHAR (from
- 1);
594 code
= SYNTAX (charcode
);
595 if (words_include_escapes
596 && (code
== Sescape
|| code
== Scharquote
))
604 if (from
== beg
) break;
605 charcode
= FETCH_CHAR (from
- 1);
606 code
= SYNTAX (charcode
);
607 if (!(words_include_escapes
608 && (code
== Sescape
|| code
== Scharquote
)))
621 DEFUN ("forward-word", Fforward_word
, Sforward_word
, 1, 1, "p",
622 "Move point forward ARG words (backward if ARG is negative).\n\
623 Normally returns t.\n\
624 If an edge of the buffer is reached, point is left there\n\
625 and nil is returned.")
630 CHECK_NUMBER (count
, 0);
632 if (!(val
= scan_words (point
, XINT (count
))))
634 SET_PT (XINT (count
) > 0 ? ZV
: BEGV
);
641 DEFUN ("forward-comment", Fforward_comment
, Sforward_comment
, 1, 1, 0,
642 "Move forward across up to N comments. If N is negative, move backward.\n\
643 Stop scanning if we find something other than a comment or whitespace.\n\
644 Set point to where scanning stops.\n\
645 If N comments are found as expected, with nothing except whitespace\n\
646 between them, return t; otherwise return nil.")
653 register enum syntaxcode code
;
654 int comstyle
= 0; /* style of comment encountered */
658 CHECK_NUMBER (count
, 0);
659 count1
= XINT (count
);
677 c
= FETCH_CHAR (from
);
681 if (from
< stop
&& SYNTAX_COMSTART_FIRST (c
)
682 && (c1
= FETCH_CHAR (from
),
683 SYNTAX_COMSTART_SECOND (c1
)))
685 /* We have encountered a comment start sequence and we
686 are ignoring all text inside comments. We must record
687 the comment style this sequence begins so that later,
688 only a comment end of the same style actually ends
689 the comment section. */
691 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
695 while (code
== Swhitespace
|| code
== Sendcomment
);
696 if (code
!= Scomment
)
702 /* We're at the start of a comment. */
711 c
= FETCH_CHAR (from
);
713 if (SYNTAX (c
) == Sendcomment
714 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
715 /* we have encountered a comment end of the same style
716 as the comment sequence which began this comment
719 if (from
< stop
&& SYNTAX_COMEND_FIRST (c
)
720 && (c1
= FETCH_CHAR (from
),
721 SYNTAX_COMEND_SECOND (c1
))
722 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
723 /* we have encountered a comment end of the same style
724 as the comment sequence which began this comment
728 /* We have skipped one comment. */
740 quoted
= char_quoted (from
);
743 c
= FETCH_CHAR (from
);
746 if (code
== Sendcomment
)
747 comstyle
= SYNTAX_COMMENT_STYLE (c
);
748 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
749 && (c1
= FETCH_CHAR (from
- 1),
750 SYNTAX_COMEND_FIRST (c1
))
751 && !char_quoted (from
- 1))
753 /* We must record the comment style encountered so that
754 later, we can match only the proper comment begin
755 sequence of the same style. */
757 comstyle
= SYNTAX_COMMENT_STYLE (c1
);
761 if (code
== Sendcomment
&& !quoted
)
764 if (code
!= SYNTAX (c
))
765 /* For a two-char comment ender, we can assume
766 it does end a comment. So scan back in a simple way. */
768 if (from
!= stop
) from
--;
771 if ((c
= FETCH_CHAR (from
),
772 SYNTAX (c
) == Scomment
)
773 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
782 if (SYNTAX_COMSTART_SECOND (c
)
783 && (c1
= FETCH_CHAR (from
),
784 SYNTAX_COMSTART_FIRST (c1
))
785 && SYNTAX_COMMENT_STYLE (c
) == comstyle
786 && !char_quoted (from
))
793 /* Look back, counting the parity of string-quotes,
794 and recording the comment-starters seen.
795 When we reach a safe place, assume that's not in a string;
796 then step the main scan to the earliest comment-starter seen
797 an even number of string quotes away from the safe place.
799 OFROM[I] is position of the earliest comment-starter seen
800 which is I+2X quotes from the comment-end.
801 PARITY is current parity of quotes from the comment end. */
804 char my_stringend
= 0;
805 int string_lossage
= 0;
806 int comment_end
= from
;
807 int comstart_pos
= 0;
808 int comstart_parity
= 0;
809 int scanstart
= from
- 1;
811 /* At beginning of range to scan, we're outside of strings;
812 that determines quote parity to the comment-end. */
815 /* Move back and examine a character. */
818 c
= FETCH_CHAR (from
);
821 /* If this char is the second of a 2-char comment sequence,
822 back up and give the pair the appropriate syntax. */
823 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
824 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1)))
828 c
= FETCH_CHAR (from
);
831 /* If this char starts a 2-char comment start sequence,
832 treat it like a 1-char comment starter. */
833 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
)
834 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
+ 1))
835 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
+ 1)))
838 /* Ignore escaped characters. */
839 if (char_quoted (from
))
842 /* Track parity of quotes. */
846 if (my_stringend
== 0)
848 /* If we have two kinds of string delimiters.
849 There's no way to grok this scanning backwards. */
850 else if (my_stringend
!= c
)
854 /* Record comment-starters according to that
855 quote-parity to the comment-end. */
856 if (code
== Scomment
)
858 comstart_parity
= parity
;
862 /* If we find another earlier comment-ender,
863 any comment-starts earlier than that don't count
864 (because they go with the earlier comment-ender). */
865 if (code
== Sendcomment
866 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
)) == comstyle
)
869 /* Assume a defun-start point is outside of strings. */
871 && (from
== stop
|| FETCH_CHAR (from
- 1) == '\n'))
875 if (comstart_pos
== 0)
877 /* If the earliest comment starter
878 is followed by uniform paired string quotes or none,
879 we know it can't be inside a string
880 since if it were then the comment ender would be inside one.
881 So it does start a comment. Skip back to it. */
882 else if (comstart_parity
== 0 && !string_lossage
)
886 /* We had two kinds of string delimiters mixed up
887 together. Decode this going forwards.
888 Scan fwd from the previous comment ender
889 to the one in question; this records where we
890 last passed a comment starter. */
891 struct lisp_parse_state state
;
892 scan_sexps_forward (&state
, find_defun_start (comment_end
),
893 comment_end
- 1, -10000, 0, Qnil
, 0);
895 from
= state
.comstart
;
897 /* We can't grok this as a comment; scan it normally. */
901 /* We have skipped one comment. */
904 else if ((code
!= Swhitespace
&& code
!= Scomment
) || quoted
)
920 int parse_sexp_ignore_comments
;
923 scan_lists (from
, count
, depth
, sexpflag
)
925 int count
, depth
, sexpflag
;
930 unsigned char stringterm
;
933 register enum syntaxcode code
;
934 int min_depth
= depth
; /* Err out if depth gets less than this. */
935 int comstyle
= 0; /* style of comment encountered */
937 if (depth
> 0) min_depth
= 0;
947 c
= FETCH_CHAR (from
);
950 if (from
< stop
&& SYNTAX_COMSTART_FIRST (c
)
951 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
))
952 && parse_sexp_ignore_comments
)
954 /* we have encountered a comment start sequence and we
955 are ignoring all text inside comments. we must record
956 the comment style this sequence begins so that later,
957 only a comment end of the same style actually ends
958 the comment section */
960 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
));
964 if (SYNTAX_PREFIX (c
))
967 switch (SWITCH_ENUM_CAST (code
))
971 if (from
== stop
) goto lose
;
973 /* treat following character as a word constituent */
976 if (depth
|| !sexpflag
) break;
977 /* This word counts as a sexp; return at end of it. */
980 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
985 if (from
== stop
) goto lose
;
999 if (!parse_sexp_ignore_comments
) break;
1008 c
= FETCH_CHAR (from
);
1009 if (SYNTAX (c
) == Sendcomment
1010 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
1011 /* we have encountered a comment end of the same style
1012 as the comment sequence which began this comment
1016 if (from
< stop
&& SYNTAX_COMEND_FIRST (c
)
1017 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from
))
1018 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
1019 /* we have encountered a comment end of the same style
1020 as the comment sequence which began this comment
1029 if (from
!= stop
&& c
== FETCH_CHAR (from
))
1039 if (!++depth
) goto done
;
1044 if (!--depth
) goto done
;
1045 if (depth
< min_depth
)
1046 error ("Containing expression ends prematurely");
1050 stringterm
= FETCH_CHAR (from
- 1);
1053 if (from
>= stop
) goto lose
;
1054 if (FETCH_CHAR (from
) == stringterm
) break;
1055 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1064 if (!depth
&& sexpflag
) goto done
;
1069 /* Reached end of buffer. Error if within object, return nil if between */
1070 if (depth
) goto lose
;
1075 /* End of object reached */
1086 if (quoted
= char_quoted (from
))
1088 c
= FETCH_CHAR (from
);
1091 if (code
== Sendcomment
)
1092 comstyle
= SYNTAX_COMMENT_STYLE (c
);
1093 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1094 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1))
1095 && !char_quoted (from
- 1)
1096 && parse_sexp_ignore_comments
)
1098 /* we must record the comment style encountered so that
1099 later, we can match only the proper comment begin
1100 sequence of the same style */
1102 comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
- 1));
1106 if (SYNTAX_PREFIX (c
))
1109 switch (SWITCH_ENUM_CAST (quoted
? Sword
: code
))
1113 if (depth
|| !sexpflag
) break;
1114 /* This word counts as a sexp; count object finished after passing it. */
1117 quoted
= char_quoted (from
- 1);
1120 if (! (quoted
|| SYNTAX (FETCH_CHAR (from
- 1)) == Sword
1121 || SYNTAX (FETCH_CHAR (from
- 1)) == Ssymbol
1122 || SYNTAX (FETCH_CHAR (from
- 1)) == Squote
))
1131 if (from
!= stop
&& c
== FETCH_CHAR (from
- 1))
1141 if (!++depth
) goto done2
;
1146 if (!--depth
) goto done2
;
1147 if (depth
< min_depth
)
1148 error ("Containing expression ends prematurely");
1152 if (!parse_sexp_ignore_comments
)
1155 if (code
!= SYNTAX (c
))
1156 /* For a two-char comment ender, we can assume
1157 it does end a comment. So scan back in a simple way. */
1159 if (from
!= stop
) from
--;
1162 if (SYNTAX (c
= FETCH_CHAR (from
)) == Scomment
1163 && SYNTAX_COMMENT_STYLE (c
) == comstyle
)
1172 if (SYNTAX_COMSTART_SECOND (c
)
1173 && SYNTAX_COMSTART_FIRST (FETCH_CHAR (from
))
1174 && SYNTAX_COMMENT_STYLE (c
) == comstyle
1175 && !char_quoted (from
))
1182 /* Look back, counting the parity of string-quotes,
1183 and recording the comment-starters seen.
1184 When we reach a safe place, assume that's not in a string;
1185 then step the main scan to the earliest comment-starter seen
1186 an even number of string quotes away from the safe place.
1188 OFROM[I] is position of the earliest comment-starter seen
1189 which is I+2X quotes from the comment-end.
1190 PARITY is current parity of quotes from the comment end. */
1193 char my_stringend
= 0;
1194 int string_lossage
= 0;
1195 int comment_end
= from
;
1196 int comstart_pos
= 0;
1197 int comstart_parity
= 0;
1198 int scanstart
= from
- 1;
1200 /* At beginning of range to scan, we're outside of strings;
1201 that determines quote parity to the comment-end. */
1202 while (from
!= stop
)
1204 /* Move back and examine a character. */
1207 c
= FETCH_CHAR (from
);
1210 /* If this char is the second of a 2-char comment sequence,
1211 back up and give the pair the appropriate syntax. */
1212 if (from
> stop
&& SYNTAX_COMEND_SECOND (c
)
1213 && SYNTAX_COMEND_FIRST (FETCH_CHAR (from
- 1)))
1217 c
= FETCH_CHAR (from
);
1220 /* If this char starts a 2-char comment start sequence,
1221 treat it like a 1-char comment starter. */
1222 if (from
< scanstart
&& SYNTAX_COMSTART_FIRST (c
)
1223 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
+ 1))
1224 && comstyle
== SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
+ 1)))
1227 /* Ignore escaped characters. */
1228 if (char_quoted (from
))
1231 /* Track parity of quotes. */
1232 if (code
== Sstring
)
1235 if (my_stringend
== 0)
1237 /* If we have two kinds of string delimiters.
1238 There's no way to grok this scanning backwards. */
1239 else if (my_stringend
!= c
)
1243 /* Record comment-starters according to that
1244 quote-parity to the comment-end. */
1245 if (code
== Scomment
)
1247 comstart_parity
= parity
;
1248 comstart_pos
= from
;
1251 /* If we find another earlier comment-ender,
1252 any comment-starts earlier than that don't count
1253 (because they go with the earlier comment-ender). */
1254 if (code
== Sendcomment
1255 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
)) == comstyle
)
1258 /* Assume a defun-start point is outside of strings. */
1260 && (from
== stop
|| FETCH_CHAR (from
- 1) == '\n'))
1264 if (comstart_pos
== 0)
1266 /* If the earliest comment starter
1267 is followed by uniform paired string quotes or none,
1268 we know it can't be inside a string
1269 since if it were then the comment ender would be inside one.
1270 So it does start a comment. Skip back to it. */
1271 else if (comstart_parity
== 0 && !string_lossage
)
1272 from
= comstart_pos
;
1275 /* We had two kinds of string delimiters mixed up
1276 together. Decode this going forwards.
1277 Scan fwd from the previous comment ender
1278 to the one in question; this records where we
1279 last passed a comment starter. */
1280 struct lisp_parse_state state
;
1281 scan_sexps_forward (&state
, find_defun_start (comment_end
),
1282 comment_end
- 1, -10000, 0, Qnil
, 0);
1283 if (state
.incomment
)
1284 from
= state
.comstart
;
1286 /* We can't grok this as a comment; scan it normally. */
1293 stringterm
= FETCH_CHAR (from
);
1296 if (from
== stop
) goto lose
;
1297 if (!char_quoted (from
- 1)
1298 && stringterm
== FETCH_CHAR (from
- 1))
1303 if (!depth
&& sexpflag
) goto done2
;
1308 /* Reached start of buffer. Error if within object, return nil if between */
1309 if (depth
) goto lose
;
1320 XSETFASTINT (val
, from
);
1324 error ("Unbalanced parentheses");
1332 register enum syntaxcode code
;
1333 register int beg
= BEGV
;
1334 register int quoted
= 0;
1337 && ((code
= SYNTAX (FETCH_CHAR (pos
- 1))) == Scharquote
1338 || code
== Sescape
))
1339 pos
--, quoted
= !quoted
;
1343 DEFUN ("scan-lists", Fscan_lists
, Sscan_lists
, 3, 3, 0,
1344 "Scan from character number FROM by COUNT lists.\n\
1345 Returns the character number of the position thus found.\n\
1347 If DEPTH is nonzero, paren depth begins counting from that value,\n\
1348 only places where the depth in parentheses becomes zero\n\
1349 are candidates for stopping; COUNT such places are counted.\n\
1350 Thus, a positive value for DEPTH means go out levels.\n\
1352 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1354 If the beginning or end of (the accessible part of) the buffer is reached\n\
1355 and the depth is wrong, an error is signaled.\n\
1356 If the depth is right but the count is not used up, nil is returned.")
1357 (from
, count
, depth
)
1358 Lisp_Object from
, count
, depth
;
1360 CHECK_NUMBER (from
, 0);
1361 CHECK_NUMBER (count
, 1);
1362 CHECK_NUMBER (depth
, 2);
1364 return scan_lists (XINT (from
), XINT (count
), XINT (depth
), 0);
1367 DEFUN ("scan-sexps", Fscan_sexps
, Sscan_sexps
, 2, 2, 0,
1368 "Scan from character number FROM by COUNT balanced expressions.\n\
1369 If COUNT is negative, scan backwards.\n\
1370 Returns the character number of the position thus found.\n\
1372 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.\n\
1374 If the beginning or end of (the accessible part of) the buffer is reached\n\
1375 in the middle of a parenthetical grouping, an error is signaled.\n\
1376 If the beginning or end is reached between groupings\n\
1377 but before count is used up, nil is returned.")
1379 Lisp_Object from
, count
;
1381 CHECK_NUMBER (from
, 0);
1382 CHECK_NUMBER (count
, 1);
1384 return scan_lists (XINT (from
), XINT (count
), 0, 1);
1387 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars
, Sbackward_prefix_chars
,
1389 "Move point backward over any number of chars with prefix syntax.\n\
1390 This includes chars with \"quote\" or \"prefix\" syntax (' or p).")
1396 while (pos
> beg
&& !char_quoted (pos
- 1)
1397 && (SYNTAX (FETCH_CHAR (pos
- 1)) == Squote
1398 || SYNTAX_PREFIX (FETCH_CHAR (pos
- 1))))
1406 /* Parse forward from FROM to END,
1407 assuming that FROM has state OLDSTATE (nil means FROM is start of function),
1408 and return a description of the state of the parse at END.
1409 If STOPBEFORE is nonzero, stop at the start of an atom.
1410 If COMMENTSTOP is nonzero, stop at the start of a comment. */
1413 scan_sexps_forward (stateptr
, from
, end
, targetdepth
,
1414 stopbefore
, oldstate
, commentstop
)
1415 struct lisp_parse_state
*stateptr
;
1417 int end
, targetdepth
, stopbefore
;
1418 Lisp_Object oldstate
;
1421 struct lisp_parse_state state
;
1423 register enum syntaxcode code
;
1424 struct level
{ int last
, prev
; };
1425 struct level levelstart
[100];
1426 register struct level
*curlevel
= levelstart
;
1427 struct level
*endlevel
= levelstart
+ 100;
1429 register int depth
; /* Paren depth of current scanning location.
1430 level - levelstart equals this except
1431 when the depth becomes negative. */
1432 int mindepth
; /* Lowest DEPTH value seen. */
1433 int start_quoted
= 0; /* Nonzero means starting after a char quote */
1439 if (NILP (oldstate
))
1442 state
.instring
= -1;
1443 state
.incomment
= 0;
1444 state
.comstyle
= 0; /* comment style a by default */
1448 tem
= Fcar (oldstate
);
1454 oldstate
= Fcdr (oldstate
);
1455 oldstate
= Fcdr (oldstate
);
1456 oldstate
= Fcdr (oldstate
);
1457 tem
= Fcar (oldstate
);
1458 state
.instring
= !NILP (tem
) ? XINT (tem
) : -1;
1460 oldstate
= Fcdr (oldstate
);
1461 tem
= Fcar (oldstate
);
1462 state
.incomment
= !NILP (tem
);
1464 oldstate
= Fcdr (oldstate
);
1465 tem
= Fcar (oldstate
);
1466 start_quoted
= !NILP (tem
);
1468 /* if the eight element of the list is nil, we are in comment
1469 style a. if it is non-nil, we are in comment style b */
1470 oldstate
= Fcdr (oldstate
);
1471 oldstate
= Fcdr (oldstate
);
1472 tem
= Fcar (oldstate
);
1473 state
.comstyle
= !NILP (tem
);
1478 curlevel
->prev
= -1;
1479 curlevel
->last
= -1;
1481 /* Enter the loop at a place appropriate for initial state. */
1483 if (state
.incomment
) goto startincomment
;
1484 if (state
.instring
>= 0)
1486 if (start_quoted
) goto startquotedinstring
;
1489 if (start_quoted
) goto startquoted
;
1493 code
= SYNTAX (FETCH_CHAR (from
));
1495 if (code
== Scomment
)
1496 state
.comstart
= from
-1;
1498 else if (from
< end
&& SYNTAX_COMSTART_FIRST (FETCH_CHAR (from
- 1))
1499 && SYNTAX_COMSTART_SECOND (FETCH_CHAR (from
)))
1501 /* Record the comment style we have entered so that only
1502 the comment-end sequence of the same style actually
1503 terminates the comment section. */
1505 state
.comstyle
= SYNTAX_COMMENT_STYLE (FETCH_CHAR (from
));
1506 state
.comstart
= from
-1;
1510 if (SYNTAX_PREFIX (FETCH_CHAR (from
- 1)))
1512 switch (SWITCH_ENUM_CAST (code
))
1516 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1517 curlevel
->last
= from
- 1;
1519 if (from
== end
) goto endquoted
;
1522 /* treat following character as a word constituent */
1525 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1526 curlevel
->last
= from
- 1;
1530 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1535 if (from
== end
) goto endquoted
;
1547 curlevel
->prev
= curlevel
->last
;
1555 /* Enter the loop in the middle so that we find
1556 a 2-char comment ender if we start in the middle of it. */
1557 prev
= FETCH_CHAR (from
- 1);
1558 goto startincomment_1
;
1560 /* At beginning of buffer, enter the loop the ordinary way. */
1563 state
.incomment
= 1;
1568 if (from
== end
) goto done
;
1569 prev
= FETCH_CHAR (from
);
1570 if (SYNTAX (prev
) == Sendcomment
1571 && SYNTAX_COMMENT_STYLE (prev
) == state
.comstyle
)
1572 /* Only terminate the comment section if the endcomment
1573 of the same style as the start sequence has been
1578 if (from
< end
&& SYNTAX_COMEND_FIRST (prev
)
1579 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from
))
1580 && SYNTAX_COMMENT_STYLE (prev
) == state
.comstyle
)
1581 /* Only terminate the comment section if the end-comment
1582 sequence of the same style as the start sequence has
1583 been encountered. */
1586 state
.incomment
= 0;
1587 state
.comstyle
= 0; /* reset the comment style */
1591 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1593 /* curlevel++->last ran into compiler bug on Apollo */
1594 curlevel
->last
= from
- 1;
1595 if (++curlevel
== endlevel
)
1596 error ("Nesting too deep for parser");
1597 curlevel
->prev
= -1;
1598 curlevel
->last
= -1;
1599 if (targetdepth
== depth
) goto done
;
1604 if (depth
< mindepth
)
1606 if (curlevel
!= levelstart
)
1608 curlevel
->prev
= curlevel
->last
;
1609 if (targetdepth
== depth
) goto done
;
1613 if (stopbefore
) goto stop
; /* this arg means stop at sexp start */
1614 curlevel
->last
= from
- 1;
1615 state
.instring
= FETCH_CHAR (from
- 1);
1619 if (from
>= end
) goto done
;
1620 if (FETCH_CHAR (from
) == state
.instring
) break;
1621 switch (SWITCH_ENUM_CAST (SYNTAX (FETCH_CHAR (from
))))
1626 startquotedinstring
:
1627 if (from
>= end
) goto endquoted
;
1631 state
.instring
= -1;
1632 curlevel
->prev
= curlevel
->last
;
1642 stop
: /* Here if stopping before start of sexp. */
1643 from
--; /* We have just fetched the char that starts it; */
1644 goto done
; /* but return the position before it. */
1649 state
.depth
= depth
;
1650 state
.mindepth
= mindepth
;
1651 state
.thislevelstart
= curlevel
->prev
;
1652 state
.prevlevelstart
1653 = (curlevel
== levelstart
) ? -1 : (curlevel
- 1)->last
;
1654 state
.location
= from
;
1660 /* This comment supplies the doc string for parse-partial-sexp,
1661 for make-docfile to see. We cannot put this in the real DEFUN
1662 due to limits in the Unix cpp.
1664 DEFUN ("parse-partial-sexp", Ffoo, Sfoo, 2, 6, 0,
1665 "Parse Lisp syntax starting at FROM until TO; return status of parse at TO.\n\
1666 Parsing stops at TO or when certain criteria are met;\n\
1667 point is set to where parsing stops.\n\
1668 If fifth arg STATE is omitted or nil,\n\
1669 parsing assumes that FROM is the beginning of a function.\n\
1670 Value is a list of eight elements describing final state of parsing:\n\
1671 0. depth in parens.\n\
1672 1. character address of start of innermost containing list; nil if none.\n\
1673 2. character address of start of last complete sexp terminated.\n\
1674 3. non-nil if inside a string.\n\
1675 (it is the character that will terminate the string.)\n\
1676 4. t if inside a comment.\n\
1677 5. t if following a quote character.\n\
1678 6. the minimum paren-depth encountered during this scan.\n\
1679 7. t if in a comment of style `b'.\n\
1680 If third arg TARGETDEPTH is non-nil, parsing stops if the depth\n\
1681 in parentheses becomes equal to TARGETDEPTH.\n\
1682 Fourth arg STOPBEFORE non-nil means stop when come to\n\
1683 any character that starts a sexp.\n\
1684 Fifth arg STATE is an eight-list like what this function returns.\n\
1685 It is used to initialize the state of the parse. Its second and third
1686 elements are ignored.
1687 Sixth args COMMENTSTOP non-nil means stop at the start of a comment.")
1688 (from, to, targetdepth, stopbefore, state, commentstop)
1691 DEFUN ("parse-partial-sexp", Fparse_partial_sexp
, Sparse_partial_sexp
, 2, 6, 0,
1692 0 /* See immediately above */)
1693 (from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
)
1694 Lisp_Object from
, to
, targetdepth
, stopbefore
, oldstate
, commentstop
;
1696 struct lisp_parse_state state
;
1699 if (!NILP (targetdepth
))
1701 CHECK_NUMBER (targetdepth
, 3);
1702 target
= XINT (targetdepth
);
1705 target
= -100000; /* We won't reach this depth */
1707 validate_region (&from
, &to
);
1708 scan_sexps_forward (&state
, XINT (from
), XINT (to
),
1709 target
, !NILP (stopbefore
), oldstate
,
1710 !NILP (commentstop
));
1712 SET_PT (state
.location
);
1714 return Fcons (make_number (state
.depth
),
1715 Fcons (state
.prevlevelstart
< 0 ? Qnil
: make_number (state
.prevlevelstart
),
1716 Fcons (state
.thislevelstart
< 0 ? Qnil
: make_number (state
.thislevelstart
),
1717 Fcons (state
.instring
>= 0 ? make_number (state
.instring
) : Qnil
,
1718 Fcons (state
.incomment
? Qt
: Qnil
,
1719 Fcons (state
.quoted
? Qt
: Qnil
,
1720 Fcons (make_number (state
.mindepth
),
1721 Fcons (state
.comstyle
? Qt
: Qnil
,
1730 /* This has to be done here, before we call Fmake_char_table. */
1731 Qsyntax_table
= intern ("syntax-table");
1732 staticpro (&Qsyntax_table
);
1734 /* Intern this now in case it isn't already done.
1735 Setting this variable twice is harmless.
1736 But don't staticpro it here--that is done in alloc.c. */
1737 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1739 /* Now we are ready to set up this property, so we can
1740 create syntax tables. */
1741 Fput (Qsyntax_table
, Qchar_table_extra_slots
, make_number (0));
1743 temp
= Fcons (make_number ((int) Swhitespace
), Qnil
);
1745 Vstandard_syntax_table
= Fmake_char_table (Qsyntax_table
, temp
);
1747 temp
= Fcons (make_number ((int) Sword
), Qnil
);
1748 for (i
= 'a'; i
<= 'z'; i
++)
1749 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
1750 for (i
= 'A'; i
<= 'Z'; i
++)
1751 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
1752 for (i
= '0'; i
<= '9'; i
++)
1753 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, i
, temp
);
1755 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '$', temp
);
1756 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '%', temp
);
1758 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '(',
1759 Fcons (make_number (Sopen
), make_number (')')));
1760 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ')',
1761 Fcons (make_number (Sclose
), make_number ('(')));
1762 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '[',
1763 Fcons (make_number (Sopen
), make_number (']')));
1764 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ']',
1765 Fcons (make_number (Sclose
), make_number ('[')));
1766 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '{',
1767 Fcons (make_number (Sopen
), make_number ('}')));
1768 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '}',
1769 Fcons (make_number (Sclose
), make_number ('{')));
1770 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '"',
1771 Fcons (make_number ((int) Sstring
), Qnil
));
1772 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, '\\',
1773 Fcons (make_number ((int) Sescape
), Qnil
));
1775 temp
= Fcons (make_number ((int) Ssymbol
), Qnil
);
1776 for (i
= 0; i
< 10; i
++)
1777 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, "_-+*/&|<>="[i
], temp
);
1779 temp
= Fcons (make_number ((int) Spunct
), Qnil
);
1780 for (i
= 0; i
< 12; i
++)
1781 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table
, ".,;:?!#@~^'`"[i
], temp
);
1786 Qsyntax_table_p
= intern ("syntax-table-p");
1787 staticpro (&Qsyntax_table_p
);
1789 DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments
,
1790 "Non-nil means `forward-sexp', etc., should treat comments as whitespace.");
1792 words_include_escapes
= 0;
1793 DEFVAR_BOOL ("words-include-escapes", &words_include_escapes
,
1794 "Non-nil means `forward-word', etc., should treat escape chars part of words.");
1796 defsubr (&Ssyntax_table_p
);
1797 defsubr (&Ssyntax_table
);
1798 defsubr (&Sstandard_syntax_table
);
1799 defsubr (&Scopy_syntax_table
);
1800 defsubr (&Sset_syntax_table
);
1801 defsubr (&Schar_syntax
);
1802 defsubr (&Smatching_paren
);
1803 defsubr (&Smodify_syntax_entry
);
1804 defsubr (&Sdescribe_syntax
);
1806 defsubr (&Sforward_word
);
1808 defsubr (&Sforward_comment
);
1809 defsubr (&Sscan_lists
);
1810 defsubr (&Sscan_sexps
);
1811 defsubr (&Sbackward_prefix_chars
);
1812 defsubr (&Sparse_partial_sexp
);