/* GNU Emacs routines to deal with syntax tables; also word and list parsing.
- Copyright (C) 1985, 87, 93, 94, 95, 97, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1985, 87, 93, 94, 95, 97, 1998, 1999 Free Software Foundation, Inc.
This file is part of GNU Emacs.
#include "commands.h"
#include "buffer.h"
#include "charset.h"
-#include <assert.h>
/* Make syntax table lookup grant data in gl_state. */
#define SYNTAX_ENTRY_VIA_PROPERTY
int words_include_escapes;
int parse_sexp_lookup_properties;
+/* Nonzero means `scan-sexps' treat all multibyte characters as symbol. */
+int multibyte_syntax_as_symbol;
+
/* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
if not compiled with GCC. No need to mark it, since it is used
only very temporarily. */
Lisp_Object syntax_temp;
+/* Non-zero means an open parenthesis in column 0 is always considered
+ to be the start of a defun. Zero means an open parenthesis in
+ column 0 has no special meaning. */
+
+int open_paren_in_column_0_is_defun_start;
+
/* This is the internal form of the parse state used in parse-partial-sexp. */
struct lisp_parse_state
Lisp_Object object;
{
Lisp_Object tmp_table;
- int cnt = 0, doing_extra = 0, invalidate = 1;
+ int cnt = 0, invalidate = 1;
INTERVAL i, oldi;
if (init)
while (!NULL_PARENT (i))
{
if (AM_RIGHT_CHILD (i))
- i->parent->position = i->position
+ INTERVAL_PARENT (i)->position = i->position
- LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
- - TOTAL_LENGTH (i->parent)
- + LEFT_TOTAL_LENGTH (i->parent);
+ - TOTAL_LENGTH (INTERVAL_PARENT (i))
+ + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
else
- i->parent->position = i->position - LEFT_TOTAL_LENGTH (i)
+ INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
+ TOTAL_LENGTH (i);
- i = i->parent;
+ i = INTERVAL_PARENT (i);
}
i = gl_state.forward_i;
gl_state.b_property = i->position - 1 - gl_state.offset;
find_defun_start (pos, pos_byte)
int pos, pos_byte;
{
- int tem;
- int shortage;
int opoint = PT, opoint_byte = PT_BYTE;
/* Use previous finding, if it's valid and applies to this inquiry. */
syntax-tables. */
gl_state.current_syntax_table = current_buffer->syntax_table;
gl_state.use_global = 0;
- while (PT > BEGV)
+ if (open_paren_in_column_0_is_defun_start)
{
- /* Open-paren at start of line means we found our defun-start. */
- if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
+ while (PT > BEGV)
{
- SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
+ /* Open-paren at start of line means we may have found our
+ defun-start. */
if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
- break;
- /* Now fallback to the default value. */
- gl_state.current_syntax_table = current_buffer->syntax_table;
- gl_state.use_global = 0;
+ {
+ SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
+ if (SYNTAX (FETCH_CHAR (PT_BYTE)) == Sopen)
+ break;
+ /* Now fallback to the default value. */
+ gl_state.current_syntax_table = current_buffer->syntax_table;
+ gl_state.use_global = 0;
+ }
+ /* Move to beg of previous line. */
+ scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
}
- /* Move to beg of previous line. */
- scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
}
/* Record what we found, for the next try. */
OFROM[I] is position of the earliest comment-starter seen
which is I+2X quotes from the comment-end.
PARITY is current parity of quotes from the comment end. */
- int parity = 0;
- int my_stringend = 0;
+ int string_style = -1; /* Presumed outside of any string. */
int string_lossage = 0;
+ /* Not a real lossage: indicates that we have passed a matching comment
+ starter plus an non-matching comment-ender, meaning that any matching
+ comment-starter we might see later could be a false positive (hidden
+ inside another comment).
+ Test case: { a (* b } c (* d *) */
+ int comment_lossage = 0;
int comment_end = from;
int comment_end_byte = from_byte;
int comstart_pos = 0;
int comstart_byte;
- /* Value that PARITY had, when we reached the position
- in COMSTART_POS. */
- int comstart_parity = 0;
int scanstart = from - 1;
/* Place where the containing defun starts,
or 0 if we didn't come across it yet. */
that determines quote parity to the comment-end. */
while (from != stop)
{
- int temp_byte, prev_comend_second;
+ int temp_byte;
/* Move back and examine a character. */
DEC_BOTH (from, from_byte);
code = Scomment;
UPDATE_SYNTAX_TABLE_BACKWARD (from);
}
+ else if (code == Scomment && comstyle != SYNTAX_COMMENT_STYLE (c))
+ /* Ignore comment starters of a different style. */
+ continue;
/* Ignore escaped characters, except comment-enders. */
if (code != Sendcomment && char_quoted (from, from_byte))
continue;
- /* Track parity of quotes. */
- if (code == Sstring)
+ switch (code)
{
- parity ^= 1;
- if (my_stringend == 0)
- my_stringend = c;
- /* If we have two kinds of string delimiters.
- There's no way to grok this scanning backwards. */
- else if (my_stringend != c)
- string_lossage = 1;
- }
-
- if (code == Sstring_fence || code == Scomment_fence)
- {
- parity ^= 1;
- if (my_stringend == 0)
- my_stringend
- = code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE;
- /* If we have two kinds of string delimiters.
- There's no way to grok this scanning backwards. */
- else if (my_stringend != (code == Sstring_fence
- ? ST_STRING_STYLE : ST_COMMENT_STYLE))
+ case Sstring_fence:
+ case Scomment_fence:
+ c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
+ case Sstring:
+ /* Track parity of quotes. */
+ if (string_style == -1)
+ /* Entering a string. */
+ string_style = c;
+ else if (string_style == c)
+ /* Leaving the string. */
+ string_style = -1;
+ else
+ /* If we have two kinds of string delimiters.
+ There's no way to grok this scanning backwards. */
string_lossage = 1;
- }
+ break;
+
+ case Scomment:
+ /* We've already checked that it is the relevant comstyle. */
+ if (string_style != -1 || comment_lossage || string_lossage)
+ /* There are odd string quotes involved, so let's be careful.
+ Test case in Pascal: " { " a { " } */
+ goto lossage;
- if (code == Scomment)
- /* FIXME: we should also check that the comstyle is correct
- if the Scomment is a single-char. */
- {
- if (comnested && --nesting <= 0 && parity == 0 && !string_lossage)
+ if (!comnested)
+ {
+ /* Record best comment-starter so far. */
+ comstart_pos = from;
+ comstart_byte = from_byte;
+ }
+ else if (--nesting <= 0)
/* nested comments have to be balanced, so we don't need to
keep looking for earlier ones. We use here the same (slightly
incorrect) reasoning as below: since it is followed by uniform
paired string quotes, this comment-start has to be outside of
strings, else the comment-end itself would be inside a string. */
goto done;
+ break;
- /* Record comment-starters according to that
- quote-parity to the comment-end. */
- comstart_parity = parity;
- comstart_pos = from;
- comstart_byte = from_byte;
- }
+ case Sendcomment:
+ if (SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
+ /* This is the same style of comment ender as ours. */
+ {
+ if (comnested)
+ nesting++;
+ else
+ /* Anything before that can't count because it would match
+ this comment-ender rather than ours. */
+ from = stop; /* Break out of the loop. */
+ }
+ else if (comstart_pos != 0 || c != '\n')
+ /* We're mixing comment styles here, so we'd better be careful.
+ The (comstart_pos != 0 || c != '\n') check is not quite correct
+ (we should just always set comment_lossage), but removing it
+ would imply that any multiline comment in C would go through
+ lossage, which seems overkill.
+ The failure should only happen in the rare cases such as
+ { (* } *) */
+ comment_lossage = 1;
+ break;
- /* If we find another earlier comment-ender,
- any comment-starts earlier than that don't count
- (because they go with the earlier comment-ender). */
- if (code == Sendcomment
- && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
- if (comnested)
- nesting++;
- else
+ case Sopen:
+ /* Assume a defun-start point is outside of strings. */
+ if (open_paren_in_column_0_is_defun_start
+ && (from == stop
+ || (temp_byte = dec_bytepos (from_byte),
+ FETCH_CHAR (temp_byte) == '\n')))
+ {
+ defun_start = from;
+ defun_start_byte = from_byte;
+ from = stop; /* Break out of the loop. */
+ }
break;
- /* Assume a defun-start point is outside of strings. */
- if (code == Sopen
- && (from == stop
- || (temp_byte = dec_bytepos (from_byte),
- FETCH_CHAR (temp_byte) == '\n')))
- {
- defun_start = from;
- defun_start_byte = from_byte;
+ default:
break;
}
}
from_byte = comment_end_byte;
UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
}
- /* If the earliest comment starter
- is followed by uniform paired string quotes or none,
- we know it can't be inside a string
- since if it were then the comment ender would be inside one.
- So it does start a comment. Skip back to it. */
- else if (!comnested && comstart_parity == 0 && !string_lossage)
+ /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
+ or `done'), then we've found the beginning of the non-nested comment. */
+ else if (1) /* !comnested */
{
from = comstart_pos;
from_byte = comstart_byte;
}
else
{
+ struct lisp_parse_state state;
+ lossage:
/* We had two kinds of string delimiters mixed up
together. Decode this going forwards.
- Scan fwd from the previous comment ender
+ Scan fwd from a known safe place (beginning-of-defun)
to the one in question; this records where we
last passed a comment starter. */
- struct lisp_parse_state state;
/* If we did not already find the defun start, find it now. */
if (defun_start == 0)
{
defun_start = find_defun_start (comment_end, comment_end_byte);
defun_start_byte = find_start_value_byte;
}
- scan_sexps_forward (&state,
- defun_start, defun_start_byte,
- comment_end - 1, -10000, 0, Qnil, 0);
- if (state.incomment)
- {
- /* scan_sexps_forward changed the direction of search in
- global variables, so we need to update it completely. */
-
- from = state.comstr_start;
- }
- else
+ do
{
- from = comment_end;
- }
+ scan_sexps_forward (&state,
+ defun_start, defun_start_byte,
+ comment_end, -10000, 0, Qnil, 0);
+ defun_start = comment_end;
+ if (state.incomment == (comnested ? 1 : -1)
+ && state.comstyle == comstyle)
+ from = state.comstr_start;
+ else
+ {
+ from = comment_end;
+ if (state.incomment)
+ /* If comment_end is inside some other comment, maybe ours
+ is nested, so we need to try again from within the
+ surrounding comment. Example: { a (* " *) */
+ {
+ /* FIXME: We should advance by one or two chars. */
+ defun_start = state.comstr_start + 2;
+ defun_start_byte = CHAR_TO_BYTE (defun_start);
+ }
+ }
+ } while (defun_start < comment_end);
+
from_byte = CHAR_TO_BYTE (from);
UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
}
*charpos_ptr = from;
*bytepos_ptr = from_byte;
- return from;
+ return (from == comment_end) ? -1 : from;
}
\f
DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
(table)
Lisp_Object table;
{
+ int idx;
check_syntax_table (table);
current_buffer->syntax_table = table;
/* Indicate that this buffer now has a specified syntax table. */
- current_buffer->local_var_flags
- |= XFASTINT (buffer_local_flags.syntax_table);
+ idx = PER_BUFFER_VAR_IDX (syntax_table);
+ SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
return table;
}
\f
Lisp_Object value;
{
register enum syntaxcode code;
- char desc, match, start1, start2, end1, end2, prefix, comstyle;
+ char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
char str[2];
Lisp_Object first, match_lisp;
return;
}
- first = XCONS (value)->car;
- match_lisp = XCONS (value)->cdr;
+ first = XCAR (value);
+ match_lisp = XCDR (value);
if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
{
end2 = (XINT (first) >> 19) & 1;
prefix = (XINT (first) >> 20) & 1;
comstyle = (XINT (first) >> 21) & 1;
+ comnested = (XINT (first) >> 22) & 1;
if ((int) code < 0 || (int) code >= (int) Smax)
{
insert ("p", 1);
if (comstyle)
insert ("b", 1);
+ if (comnested)
+ insert ("n", 1);
insert_string ("\twhich means: ");
insert_string (",\n\t is the second character of a comment-end sequence");
if (comstyle)
insert_string (" (comment style b)");
+ if (comnested)
+ insert_string (" (nestable)");
if (prefix)
insert_string (",\n\t is a prefix character for `backward-prefix-chars'");
DEFUN ("forward-word", Fforward_word, Sforward_word, 1, 1, "p",
"Move point forward ARG words (backward if ARG is negative).\n\
Normally returns t.\n\
-If an edge of the buffer is reached, point is left there\n\
-and nil is returned.")
+If an edge of the buffer or a field boundary is reached, point is left there\n\
+and the function returns nil. Field boundaries are not noticed if\n\
+`inhibit-field-text-motion' is non-nil.")
(count)
Lisp_Object count;
{
- int val, prompt_end;
+ int orig_val, val;
CHECK_NUMBER (count, 0);
- if (!(val = scan_words (PT, XINT (count))))
- {
- SET_PT (XINT (count) > 0 ? ZV : BEGV);
- return Qnil;
- }
+ val = orig_val = scan_words (PT, XINT (count));
+ if (! orig_val)
+ val = XINT (count) > 0 ? ZV : BEGV;
- /* If in a mini-buffer and moving backwards, stop at the end of the
- prompt. This prevents accidentially moving into the read-only
- prompt. */
- if (INTEGERP (current_buffer->prompt_end_charpos)
- && (prompt_end = XINT (current_buffer->prompt_end_charpos),
- ((PT > prompt_end && val < prompt_end)
- || (PT < prompt_end && val > prompt_end))))
- val = prompt_end;
-
+ /* Avoid jumping out of an input field. */
+ val = XFASTINT (Fconstrain_to_field (make_number (val), make_number (PT),
+ Qt, Qnil, Qnil));
+
SET_PT (val);
- return Qt;
+ return val == orig_val ? Qt : Qnil;
}
\f
Lisp_Object skip_chars ();
int forwardp, syntaxp;
Lisp_Object string, lim;
{
- register unsigned char *p, *pend;
register unsigned int c;
register int ch;
unsigned char fastmap[0400];
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
int string_multibyte;
int size_byte;
+ unsigned char *str;
+ int len;
CHECK_STRING (string, 0);
char_ranges = (int *) alloca (XSTRING (string)->size * (sizeof (int)) * 2);
string_multibyte = STRING_MULTIBYTE (string);
+ str = XSTRING (string)->data;
size_byte = STRING_BYTES (XSTRING (string));
+ /* Adjust the multibyteness of the string to that of the buffer. */
+ if (multibyte != string_multibyte)
+ {
+ int nbytes;
+
+ if (multibyte)
+ nbytes = count_size_as_multibyte (XSTRING (string)->data,
+ XSTRING (string)->size);
+ else
+ nbytes = XSTRING (string)->size;
+ if (nbytes != size_byte)
+ {
+ str = (unsigned char *) alloca (nbytes);
+ copy_text (XSTRING (string)->data, str, size_byte,
+ string_multibyte, multibyte);
+ size_byte = nbytes;
+ }
+ }
+
if (NILP (lim))
XSETINT (lim, forwardp ? ZV : BEGV);
else
bzero (fastmap, sizeof fastmap);
- i = 0, i_byte = 0;
+ i_byte = 0;
if (i_byte < size_byte
&& XSTRING (string)->data[0] == '^')
{
- negate = 1; i++, i_byte++;
+ negate = 1; i_byte++;
}
/* Find the characters specified and set their elements of fastmap.
while (i_byte < size_byte)
{
- int c_leading_code;
-
- if (string_multibyte)
- {
- c_leading_code = XSTRING (string)->data[i_byte];
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
- }
- else
- c = c_leading_code = XSTRING (string)->data[i_byte++];
+ int c_leading_code = str[i_byte];
- /* Convert multibyteness between what the string has
- and what the buffer has. */
- if (multibyte)
- c = unibyte_char_to_multibyte (c);
- else
- c &= 0377;
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, size_byte - i_byte, len);
+ i_byte += len;
if (syntaxp)
fastmap[syntax_spec_code[c & 0377]] = 1;
if (i_byte == size_byte)
break;
- if (string_multibyte)
- {
- c_leading_code = XSTRING (string)->data[i_byte];
- FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
- }
- else
- c = c_leading_code = XSTRING (string)->data[i_byte++];
+ c_leading_code = str[i_byte];
+ c = STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
+ i_byte += len;
}
if (i_byte < size_byte
- && XSTRING (string)->data[i_byte] == '-')
+ && str[i_byte] == '-')
{
unsigned int c2, c2_leading_code;
/* Skip over the dash. */
- i++, i_byte++;
+ i_byte++;
if (i_byte == size_byte)
break;
/* Get the end of the range. */
- if (string_multibyte)
- {
- c2_leading_code = XSTRING (string)->data[i_byte];
- FETCH_STRING_CHAR_ADVANCE (c2, string, i, i_byte);
- }
- else
- c2 = XSTRING (string)->data[i_byte++];
+ c2_leading_code = str[i_byte];
+ c2 =STRING_CHAR_AND_LENGTH (str+i_byte, size_byte-i_byte, len);
+ i_byte += len;
if (SINGLE_BYTE_CHAR_P (c))
{
if (! SINGLE_BYTE_CHAR_P (c2))
- error ("Invalid charcter range: %s",
- XSTRING (string)->data);
+ {
+ /* Handle a range such as \177-\377 in multibyte
+ mode. Split that into two ranges, the low
+ one ending at 0237, and the high one starting
+ at the smallest character in the charset of
+ C2 and ending at C2. */
+ int charset = CHAR_CHARSET (c2);
+ int c1 = MAKE_CHAR (charset, 0, 0);
+
+ fastmap[c2_leading_code] = 1;
+ char_ranges[n_char_ranges++] = c1;
+ char_ranges[n_char_ranges++] = c2;
+ c2 = 0237;
+ }
while (c <= c2)
{
fastmap[c] = 1;
c++;
}
}
- else
+ else if (! SINGLE_BYTE_CHAR_P (c2))
{
if (c_leading_code != c2_leading_code)
- error ("Invalid charcter range: %s",
+ error ("Invalid character range: %s",
XSTRING (string)->data);
- fastmap[c_leading_code] = 1;
if (c <= c2)
{
+ fastmap[c_leading_code] = 1;
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c2;
}
}
else
{
- fastmap[c_leading_code] = 1;
- if (!SINGLE_BYTE_CHAR_P (c))
+ if (SINGLE_BYTE_CHAR_P (c))
+ fastmap[c] = 1;
+ else
{
+ fastmap[c_leading_code] = 1;
char_ranges[n_char_ranges++] = c;
char_ranges[n_char_ranges++] = c;
}
if (multibyte)
while (pos < XINT (lim) && fastmap[(c = FETCH_BYTE (pos_byte))])
{
- if (!BASE_LEADING_CODE_P (c))
- INC_BOTH (pos, pos_byte);
- else if (n_char_ranges)
+ /* If we are looking at a multibyte character, we
+ must look up the character in the table
+ CHAR_RANGES. If there's no data in the table,
+ that character is not what we want to skip. */
+ if (BASE_LEADING_CODE_P (c)
+ && (c = FETCH_MULTIBYTE_CHAR (pos_byte),
+ ! SINGLE_BYTE_CHAR_P (c)))
{
- /* We much check CHAR_RANGES for a multibyte
- character. */
- ch = FETCH_MULTIBYTE_CHAR (pos_byte);
+ /* The following code do the right thing even if
+ n_char_ranges is zero (i.e. no data in
+ CHAR_RANGES). */
for (i = 0; i < n_char_ranges; i += 2)
- if ((ch >= char_ranges[i] && ch <= char_ranges[i + 1]))
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
break;
if (!(negate ^ (i < n_char_ranges)))
break;
-
- INC_BOTH (pos, pos_byte);
- }
- else
- {
- if (!negate) break;
- INC_BOTH (pos, pos_byte);
}
+ INC_BOTH (pos, pos_byte);
}
else
while (pos < XINT (lim) && fastmap[FETCH_BYTE (pos)])
if (multibyte)
while (pos > XINT (lim))
{
- int savepos = pos_byte;
- DEC_BOTH (pos, pos_byte);
- if (fastmap[(c = FETCH_BYTE (pos_byte))])
- {
- if (!BASE_LEADING_CODE_P (c))
- ;
- else if (n_char_ranges)
- {
- /* We much check CHAR_RANGES for a multibyte
- character. */
- ch = FETCH_MULTIBYTE_CHAR (pos_byte);
- for (i = 0; i < n_char_ranges; i += 2)
- if (ch >= char_ranges[i] && ch <= char_ranges[i + 1])
- break;
- if (!(negate ^ (i < n_char_ranges)))
- {
- pos++;
- pos_byte = savepos;
- break;
- }
- }
- else
- if (!negate)
- {
- pos++;
- pos_byte = savepos;
- break;
- }
- }
- else
+ int prev_pos_byte = pos_byte;
+
+ DEC_POS (prev_pos_byte);
+ if (!fastmap[(c = FETCH_BYTE (prev_pos_byte))])
+ break;
+
+ /* See the comment in the previous similar code. */
+ if (BASE_LEADING_CODE_P (c)
+ && (c = FETCH_MULTIBYTE_CHAR (prev_pos_byte),
+ ! SINGLE_BYTE_CHAR_P (c)))
{
- pos++;
- pos_byte = savepos;
- break;
+ for (i = 0; i < n_char_ranges; i += 2)
+ if (c >= char_ranges[i] && c <= char_ranges[i + 1])
+ break;
+ if (!(negate ^ (i < n_char_ranges)))
+ break;
}
+ pos--;
+ pos_byte = prev_pos_byte;
}
else
while (pos > XINT (lim) && fastmap[FETCH_BYTE (pos - 1)])
&& SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
&& (c1 = FETCH_CHAR (from_byte),
SYNTAX_COMEND_SECOND (c1)))
- if (--nesting <= 0)
- /* we have encountered a comment end of the same style
- as the comment sequence which began this comment
- section */
- break;
- else
- {
- INC_BOTH (from, from_byte);
- UPDATE_SYNTAX_TABLE_FORWARD (from);
- }
+ {
+ if (--nesting <= 0)
+ /* we have encountered a comment end of the same style
+ as the comment sequence which began this comment
+ section */
+ break;
+ else
+ {
+ INC_BOTH (from, from_byte);
+ UPDATE_SYNTAX_TABLE_FORWARD (from);
+ }
+ }
if (nesting > 0
&& from < stop
&& SYNTAX_FLAGS_COMSTART_FIRST (syntax)
code = SYNTAX (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
+ comstyle = SYNTAX_COMMENT_STYLE (c);
INC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_FORWARD (from);
- comstyle = 0;
if (from < stop && comstart_first
&& (c1 = FETCH_CHAR (from_byte),
SYNTAX_COMSTART_SECOND (c1)))
INC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_FORWARD (from);
}
+ /* FIXME: here we ignore 2-char endcomments while we don't
+ when going backwards. */
}
while (code == Swhitespace || code == Sendcomment);
{
found = back_comment (from, from_byte, stop, comnested, comstyle,
&out_charpos, &out_bytepos);
- if (found != -1)
+ if (found == -1)
+ {
+#if 0 /* cc-mode (and maybe others) relies on the bogus behavior. */
+ /* Failure: we should go back to the end of this
+ not-quite-endcomment. */
+ if (SYNTAX(c) != code)
+ /* It was a two-char Sendcomment. */
+ INC_BOTH (from, from_byte);
+ goto leave;
+#endif
+ }
+ else
+ /* We have skipped one comment. */
from = out_charpos, from_byte = out_bytepos;
- /* We have skipped one comment. */
break;
}
else if (code != Swhitespace && code != Scomment)
return Qt;
}
\f
+/* Return syntax code of character C if C is a single byte character
+ or `multibyte_symbol_p' is zero. Otherwise, retrun Ssymbol. */
+
+#define SYNTAX_WITH_MULTIBYTE_CHECK(c) \
+ ((SINGLE_BYTE_CHAR_P (c) || !multibyte_symbol_p) \
+ ? SYNTAX (c) : Ssymbol)
+
static Lisp_Object
scan_lists (from, count, depth, sexpflag)
register int from;
int from_byte;
int out_bytepos, out_charpos;
int temp, dummy;
+ int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
if (depth > 0) min_depth = 0;
int comstart_first, prefix;
UPDATE_SYNTAX_TABLE_FORWARD (from);
c = FETCH_CHAR (from_byte);
- code = SYNTAX (c);
+ code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
comstart_first = SYNTAX_COMSTART_FIRST (c);
comnested = SYNTAX_COMMENT_NESTED (c);
+ comstyle = SYNTAX_COMMENT_STYLE (c);
prefix = SYNTAX_PREFIX (c);
if (depth == min_depth)
last_good = from;
UPDATE_SYNTAX_TABLE_FORWARD (from);
/* Some compilers can't handle this inside the switch. */
- temp = SYNTAX (FETCH_CHAR (from_byte));
+ c = FETCH_CHAR (from_byte);
+ temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
switch (temp)
{
case Scharquote:
{
if (from >= stop) goto lose;
UPDATE_SYNTAX_TABLE_FORWARD (from);
+ c = FETCH_CHAR (from_byte);
if (code == Sstring
- ? (FETCH_CHAR (from_byte) == stringterm)
- : SYNTAX (FETCH_CHAR (from_byte)) == Sstring_fence)
+ ? c == stringterm
+ : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
break;
/* Some compilers can't handle this inside the switch. */
- temp = SYNTAX (FETCH_CHAR (from_byte));
+ temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
switch (temp)
{
case Scharquote:
DEC_BOTH (from, from_byte);
UPDATE_SYNTAX_TABLE_BACKWARD (from);
c = FETCH_CHAR (from_byte);
- code = SYNTAX (c);
+ code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
if (depth == min_depth)
last_good = from;
comstyle = 0;
}
/* Quoting turns anything except a comment-ender
- into a word character. Note that this if cannot be true
+ into a word character. Note that this cannot be true
if we decremented FROM in the if-statement above. */
if (code != Sendcomment && char_quoted (from, from_byte))
code = Sword;
temp_pos--;
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
c1 = FETCH_CHAR (temp_pos);
- temp_code = SYNTAX (c1);
+ temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
/* Don't allow comment-end to be quoted. */
if (temp_code == Sendcomment)
goto done2;
UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
}
c1 = FETCH_CHAR (temp_pos);
- temp_code = SYNTAX (c1);
+ temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
if (! (quoted || temp_code == Sword
|| temp_code == Ssymbol
|| temp_code == Squote))
break;
found = back_comment (from, from_byte, stop, comnested, comstyle,
&out_charpos, &out_bytepos);
+ /* FIXME: if found == -1, then it really wasn't a comment-end.
+ For single-char Sendcomment, we can't do much about it apart
+ from skipping the char.
+ For 2-char endcomments, we could try again, taking both
+ chars as separate entities, but it's a lot of trouble
+ for very little gain, so we don't bother either. -sm */
if (found != -1)
from = out_charpos, from_byte = out_bytepos;
break;
if (from == stop) goto lose;
UPDATE_SYNTAX_TABLE_BACKWARD (from);
if (!char_quoted (from, from_byte)
- && SYNTAX (FETCH_CHAR (from_byte)) == code)
+ && (c = FETCH_CHAR (from_byte),
+ SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
break;
}
if (code == Sstring_fence && !depth && sexpflag) goto done2;
oldstate = Fcdr (oldstate);
tem = Fcar (oldstate);
/* Check whether we are inside string_fence-style string: */
- state.instring = ( !NILP (tem)
- ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
- : -1);
+ state.instring = (!NILP (tem)
+ ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
+ : -1);
oldstate = Fcdr (oldstate);
tem = Fcar (oldstate);
- state.incomment = ( !NILP (tem)
- ? ( INTEGERP (tem) ? XINT (tem) : -1)
+ state.incomment = (!NILP (tem)
+ ? (INTEGERP (tem) ? XINT (tem) : -1)
: 0);
oldstate = Fcdr (oldstate);
oldstate = Fcdr (oldstate);
oldstate = Fcdr (oldstate);
tem = Fcar (oldstate);
- state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
- ? ST_COMMENT_STYLE : 1 );
+ state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
+ ? ST_COMMENT_STYLE : 1);
oldstate = Fcdr (oldstate);
tem = Fcar (oldstate);
/* curlevel++->last ran into compiler bug on Apollo */
curlevel->last = XINT (Fcar (tem));
if (++curlevel == endlevel)
- error ("Nesting too deep for parser");
+ curlevel--; /* error ("Nesting too deep for parser"); */
curlevel->prev = -1;
curlevel->last = -1;
tem = Fcdr (tem);
if (code == Scomment)
{
+ state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
1 : -1);
state.comstr_start = prev_from;
curlevel->prev = curlevel->last;
break;
- startincomment:
- if (commentstop == 1)
- goto done;
- goto commentloop;
-
case Scomment:
- assert (state.incomment != 0); /* state.incomment = -1; */
if (commentstop || boundary_stop) goto done;
- commentloop:
- /* The (from == BEGV) test is to enter the loop in the middle so
+ startincomment:
+ /* The (from == BEGV) test was to enter the loop in the middle so
that we find a 2-char comment ender even if we start in the
- middle of it. */
+ middle of it. We don't want to do that if we're just at the
+ beginning of the comment (think of (*) ... (*)). */
found = forw_comment (from, from_byte, end,
state.incomment, state.comstyle,
- (from == BEGV) ? 0 : prev_from_syntax,
+ (from == BEGV || from < state.comstr_start + 3)
+ ? 0 : prev_from_syntax,
&out_charpos, &out_bytepos, &state.incomment);
from = out_charpos; from_byte = out_bytepos;
/* Beware! prev_from and friends are invalid now.
Luckily, the `done' doesn't use them and the INC_FROM
sets them to a sane value without looking at them. */
if (!found) goto done;
- INC_FROM;
+ INC_FROM;
state.incomment = 0;
state.comstyle = 0; /* reset the comment style */
if (boundary_stop) goto done;
/* curlevel++->last ran into compiler bug on Apollo */
curlevel->last = prev_from;
if (++curlevel == endlevel)
- error ("Nesting too deep for parser");
+ curlevel--; /* error ("Nesting too deep for parser"); */
curlevel->prev = -1;
curlevel->last = -1;
if (targetdepth == depth) goto done;
else an integer (the current comment nesting).\n\
5. t if following a quote character.\n\
6. the minimum paren-depth encountered during this scan.\n\
- 7. t if in a comment of style b; `syntax-table' if the comment\n\
+ 7. t if in a comment of style b; symbol `syntax-table' if the comment\n\
should be terminated by a generic comment delimiter.\n\
8. character address of start of comment or string; nil if not in one.\n\
9. Intermediate data for continuation of parsing (subject to change).\n\
It is used to initialize the state of the parse. Elements number 1, 2, 6\n\
and 8 are ignored; you can leave off element 8 (the last) entirely.\n\
Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.\n\
- If it is `syntax-table', stop after the start of a comment or a string,\n\
- or after end of a comment or a string.")
+ If it is symbol `syntax-table', stop after the start of a comment or a\n\
+ string, or after end of a comment or a string.")
(from, to, targetdepth, stopbefore, state, commentstop)
*/
c = ".,;:?!#@~^'`"[i];
SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
}
+
+ /* All multibyte characters have syntax `word' by default. */
+ temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
+ for (i = CHAR_TABLE_SINGLE_BYTE_SLOTS; i < CHAR_TABLE_ORDINARY_SLOTS; i++)
+ XCHAR_TABLE (Vstandard_syntax_table)->contents[i] = temp;
}
void
DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
"Non-nil means `forward-word', etc., should treat escape chars part of words.");
+ DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
+ "Non-nil means `scan-sexps' treats all multibyte characters as symbol.");
+ multibyte_syntax_as_symbol = 0;
+
+ DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
+ &open_paren_in_column_0_is_defun_start,
+ "Non-nil means an open paren in column 0 denotes the start of a defun.");
+ open_paren_in_column_0_is_defun_start = 1;
+
defsubr (&Ssyntax_table_p);
defsubr (&Ssyntax_table);
defsubr (&Sstandard_syntax_table);