src/syntax.c

   1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
   2    Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2011
   3                  Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20
  21 #include <config.h>
  22 #include <ctype.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "commands.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "keymap.h"
  29 #include "regex.h"
  30
  31 /* Make syntax table lookup grant data in gl_state.  */
  32 #define SYNTAX_ENTRY_VIA_PROPERTY
  33
  34 #include "syntax.h"
  35 #include "intervals.h"
  36 #include "category.h"
  37
  38 /* Then there are seven single-bit flags that have the following meanings:
  39   1. This character is the first of a two-character comment-start sequence.
  40   2. This character is the second of a two-character comment-start sequence.
  41   3. This character is the first of a two-character comment-end sequence.
  42   4. This character is the second of a two-character comment-end sequence.
  43   5. This character is a prefix, for backward-prefix-chars.
  44   6. The char is part of a delimiter for comments of style "b".
  45   7. This character is part of a nestable comment sequence.
  46   8. The char is part of a delimiter for comments of style "c".
  47   Note that any two-character sequence whose first character has flag 1
  48   and whose second character has flag 2 will be interpreted as a comment start.
  49
  50   bit 6 and 8 are used to discriminate between different comment styles.
  51   Languages such as C++ allow two orthogonal syntax start/end pairs
  52   and bit 6 is used to determine whether a comment-end or Scommentend
  53   ends style a or b.  Comment markers can start style a, b, c, or bc.
  54   Style a is always the default.
  55   For 2-char comment markers, the style b flag is only looked up on the second
  56   char of the comment marker and on the first char of the comment ender.
  57   For style c (like to for the nested flag), the flag can be placed on any
  58   one of the chars.
  59   */
  60
  61 /* These macros extract specific flags from an integer
  62    that holds the syntax code and the flags.  */
  63
  64 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
  65
  66 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
  67
  68 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
  69
  70 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
  71
  72 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
  73
  74 #define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
  75 #define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
  76 /* FLAGS should be the flags of the main char of the comment marker, e.g.
  77    the second for comstart and the first for comend.  */
  78 #define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
  79   (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
  80    | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
  81    | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
  82
  83 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
  84
  85 /* These macros extract a particular flag for a given character.  */
  86
  87 #define SYNTAX_COMEND_FIRST(c) \
  88   (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
  89 #define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
  90
  91 /* We use these constants in place for comment-style and
  92    string-ender-char to distinguish  comments/strings started by
  93    comment_fence and string_fence codes.  */
  94
  95 #define ST_COMMENT_STYLE (256 + 1)
  96 #define ST_STRING_STYLE (256 + 2)
  97
  98 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
  99
 100 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
 101    if not compiled with GCC.  No need to mark it, since it is used
 102    only very temporarily.  */
 103 Lisp_Object syntax_temp;
 104
 105 /* This is the internal form of the parse state used in parse-partial-sexp.  */
 106
 107 struct lisp_parse_state
 108   {
 109     int depth;     /* Depth at end of parsing.  */
 110     int instring;  /* -1 if not within string, else desired terminator.  */
 111     int incomment; /* -1 if in unnestable comment else comment nesting */
 112     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
 113     int quoted;    /* Nonzero if just after an escape char at end of parsing */
 114     int mindepth;  /* Minimum depth seen while scanning.  */
 115     /* Char number of most recent start-of-expression at current level */
 116     EMACS_INT thislevelstart;
 117     /* Char number of start of containing expression */
 118     EMACS_INT prevlevelstart;
 119     EMACS_INT location;      /* Char number at which parsing stopped.  */
 120     EMACS_INT comstr_start;  /* Position of last comment/string starter.  */
 121     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
 122                                 of levels (starting from outermost).  */
 123   };
 124 \f
 125 /* These variables are a cache for finding the start of a defun.
 126    find_start_pos is the place for which the defun start was found.
 127    find_start_value is the defun start position found for it.
 128    find_start_value_byte is the corresponding byte position.
 129    find_start_buffer is the buffer it was found in.
 130    find_start_begv is the BEGV value when it was found.
 131    find_start_modiff is the value of MODIFF when it was found.  */
 132
 133 static EMACS_INT find_start_pos;
 134 static EMACS_INT find_start_value;
 135 static EMACS_INT find_start_value_byte;
 136 static struct buffer *find_start_buffer;
 137 static EMACS_INT find_start_begv;
 138 static int find_start_modiff;
 139
 140
 141 static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
 142 static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
 143 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
 144 static void scan_sexps_forward (struct lisp_parse_state *,
 145                                 EMACS_INT, EMACS_INT, EMACS_INT, int,
 146                                 int, Lisp_Object, int);
 147 static int in_classes (int, Lisp_Object);
 148 \f
 149 /* Whether the syntax of the character C has the prefix flag set.  */
 150 int syntax_prefix_flag_p (int c)
 151 {
 152   return SYNTAX_PREFIX (c);
 153 }
 154
 155 struct gl_state_s gl_state;             /* Global state of syntax parser.  */
 156
 157 #define INTERVALS_AT_ONCE 10            /* 1 + max-number of intervals
 158                                            to scan to property-change.  */
 159
 160 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
 161    sign of COUNT give the relative position of CHARPOS wrt the previously
 162    valid interval.  If INIT, only [be]_property fields of gl_state are
 163    valid at start, the rest is filled basing on OBJECT.
 164
 165    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
 166    direction than the intervals - or in an interval.  We update the
 167    current syntax-table basing on the property of this interval, and
 168    update the interval to start further than CHARPOS - or be
 169    NULL_INTERVAL.  We also update lim_property to be the next value of
 170    charpos to call this subroutine again - or be before/after the
 171    start/end of OBJECT.  */
 172
 173 void
 174 update_syntax_table (EMACS_INT charpos, int count, int init,
 175                      Lisp_Object object)
 176 {
 177   Lisp_Object tmp_table;
 178   int cnt = 0, invalidate = 1;
 179   INTERVAL i;
 180
 181   if (init)
 182     {
 183       gl_state.old_prop = Qnil;
 184       gl_state.start = gl_state.b_property;
 185       gl_state.stop = gl_state.e_property;
 186       i = interval_of (charpos, object);
 187       gl_state.backward_i = gl_state.forward_i = i;
 188       invalidate = 0;
 189       if (NULL_INTERVAL_P (i))
 190         return;
 191       /* interval_of updates only ->position of the return value, so
 192          update the parents manually to speed up update_interval.  */
 193       while (!NULL_PARENT (i))
 194         {
 195           if (AM_RIGHT_CHILD (i))
 196             INTERVAL_PARENT (i)->position = i->position
 197               - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
 198               - TOTAL_LENGTH (INTERVAL_PARENT (i))
 199               + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
 200           else
 201             INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
 202               + TOTAL_LENGTH (i);
 203           i = INTERVAL_PARENT (i);
 204         }
 205       i = gl_state.forward_i;
 206       gl_state.b_property = i->position - gl_state.offset;
 207       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 208       goto update;
 209     }
 210   i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
 211
 212   /* We are guaranteed to be called with CHARPOS either in i,
 213      or further off.  */
 214   if (NULL_INTERVAL_P (i))
 215     error ("Error in syntax_table logic for to-the-end intervals");
 216   else if (charpos < i->position)               /* Move left.  */
 217     {
 218       if (count > 0)
 219         error ("Error in syntax_table logic for intervals <-");
 220       /* Update the interval.  */
 221       i = update_interval (i, charpos);
 222       if (INTERVAL_LAST_POS (i) != gl_state.b_property)
 223         {
 224           invalidate = 0;
 225           gl_state.forward_i = i;
 226           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 227         }
 228     }
 229   else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
 230     {
 231       if (count < 0)
 232         error ("Error in syntax_table logic for intervals ->");
 233       /* Update the interval.  */
 234       i = update_interval (i, charpos);
 235       if (i->position != gl_state.e_property)
 236         {
 237           invalidate = 0;
 238           gl_state.backward_i = i;
 239           gl_state.b_property = i->position - gl_state.offset;
 240         }
 241     }
 242
 243   update:
 244   tmp_table = textget (i->plist, Qsyntax_table);
 245
 246   if (invalidate)
 247     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
 248
 249   if (invalidate)               /* Did not get to adjacent interval.  */
 250     {                           /* with the same table => */
 251                                 /* invalidate the old range.  */
 252       if (count > 0)
 253         {
 254           gl_state.backward_i = i;
 255           gl_state.b_property = i->position - gl_state.offset;
 256         }
 257       else
 258         {
 259           gl_state.forward_i = i;
 260           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 261         }
 262     }
 263
 264   if (!EQ (tmp_table, gl_state.old_prop))
 265     {
 266       gl_state.current_syntax_table = tmp_table;
 267       gl_state.old_prop = tmp_table;
 268       if (EQ (Fsyntax_table_p (tmp_table), Qt))
 269         {
 270           gl_state.use_global = 0;
 271         }
 272       else if (CONSP (tmp_table))
 273         {
 274           gl_state.use_global = 1;
 275           gl_state.global_code = tmp_table;
 276         }
 277       else
 278         {
 279           gl_state.use_global = 0;
 280           gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
 281         }
 282     }
 283
 284   while (!NULL_INTERVAL_P (i))
 285     {
 286       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
 287         {
 288           if (count > 0)
 289             {
 290               gl_state.e_property = i->position - gl_state.offset;
 291               gl_state.forward_i = i;
 292             }
 293           else
 294             {
 295               gl_state.b_property
 296                 = i->position + LENGTH (i) - gl_state.offset;
 297               gl_state.backward_i = i;
 298             }
 299           return;
 300         }
 301       else if (cnt == INTERVALS_AT_ONCE)
 302         {
 303           if (count > 0)
 304             {
 305               gl_state.e_property
 306                 = i->position + LENGTH (i) - gl_state.offset
 307                 /* e_property at EOB is not set to ZV but to ZV+1, so that
 308                    we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
 309                    having to check eob between the two.  */
 310                 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
 311               gl_state.forward_i = i;
 312             }
 313           else
 314             {
 315               gl_state.b_property = i->position - gl_state.offset;
 316               gl_state.backward_i = i;
 317             }
 318           return;
 319         }
 320       cnt++;
 321       i = count > 0 ? next_interval (i) : previous_interval (i);
 322     }
 323   eassert (NULL_INTERVAL_P (i)); /* This property goes to the end.  */
 324   if (count > 0)
 325     gl_state.e_property = gl_state.stop;
 326   else
 327     gl_state.b_property = gl_state.start;
 328 }
 329 \f
 330 /* Returns TRUE if char at CHARPOS is quoted.
 331    Global syntax-table data should be set up already to be good at CHARPOS
 332    or after.  On return global syntax data is good for lookup at CHARPOS. */
 333
 334 static int
 335 char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
 336 {
 337   register enum syntaxcode code;
 338   register EMACS_INT beg = BEGV;
 339   register int quoted = 0;
 340   EMACS_INT orig = charpos;
 341
 342   while (charpos > beg)
 343     {
 344       int c;
 345       DEC_BOTH (charpos, bytepos);
 346
 347       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
 348       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
 349       code = SYNTAX (c);
 350       if (! (code == Scharquote || code == Sescape))
 351         break;
 352
 353       quoted = !quoted;
 354     }
 355
 356   UPDATE_SYNTAX_TABLE (orig);
 357   return quoted;
 358 }
 359
 360 /* Return the bytepos one character before BYTEPOS.
 361    We assume that BYTEPOS is not at the start of the buffer.  */
 362
 363 static INLINE EMACS_INT
 364 dec_bytepos (EMACS_INT bytepos)
 365 {
 366   if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
 367     return bytepos - 1;
 368
 369   DEC_POS (bytepos);
 370   return bytepos;
 371 }
 372 \f
 373 /* Return a defun-start position before POS and not too far before.
 374    It should be the last one before POS, or nearly the last.
 375
 376    When open_paren_in_column_0_is_defun_start is nonzero,
 377    only the beginning of the buffer is treated as a defun-start.
 378
 379    We record the information about where the scan started
 380    and what its result was, so that another call in the same area
 381    can return the same value very quickly.
 382
 383    There is no promise at which position the global syntax data is
 384    valid on return from the subroutine, so the caller should explicitly
 385    update the global data.  */
 386
 387 static EMACS_INT
 388 find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
 389 {
 390   EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
 391
 392   if (!open_paren_in_column_0_is_defun_start)
 393     {
 394       find_start_value_byte = BEGV_BYTE;
 395       return BEGV;
 396     }
 397
 398   /* Use previous finding, if it's valid and applies to this inquiry.  */
 399   if (current_buffer == find_start_buffer
 400       /* Reuse the defun-start even if POS is a little farther on.
 401          POS might be in the next defun, but that's ok.
 402          Our value may not be the best possible, but will still be usable.  */
 403       && pos <= find_start_pos + 1000
 404       && pos >= find_start_value
 405       && BEGV == find_start_begv
 406       && MODIFF == find_start_modiff)
 407     return find_start_value;
 408
 409   /* Back up to start of line.  */
 410   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
 411
 412   /* We optimize syntax-table lookup for rare updates.  Thus we accept
 413      only those `^\s(' which are good in global _and_ text-property
 414      syntax-tables.  */
 415   SETUP_BUFFER_SYNTAX_TABLE ();
 416   while (PT > BEGV)
 417     {
 418       int c;
 419
 420       /* Open-paren at start of line means we may have found our
 421          defun-start.  */
 422       c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 423       if (SYNTAX (c) == Sopen)
 424         {
 425           SETUP_SYNTAX_TABLE (PT + 1, -1);      /* Try again... */
 426           c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 427           if (SYNTAX (c) == Sopen)
 428             break;
 429           /* Now fallback to the default value.  */
 430           SETUP_BUFFER_SYNTAX_TABLE ();
 431         }
 432       /* Move to beg of previous line.  */
 433       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
 434     }
 435
 436   /* Record what we found, for the next try.  */
 437   find_start_value = PT;
 438   find_start_value_byte = PT_BYTE;
 439   find_start_buffer = current_buffer;
 440   find_start_modiff = MODIFF;
 441   find_start_begv = BEGV;
 442   find_start_pos = pos;
 443
 444   TEMP_SET_PT_BOTH (opoint, opoint_byte);
 445
 446   return find_start_value;
 447 }
 448 \f
 449 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
 450
 451 static int
 452 prev_char_comend_first (EMACS_INT pos, EMACS_INT pos_byte)
 453 {
 454   int c, val;
 455
 456   DEC_BOTH (pos, pos_byte);
 457   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 458   c = FETCH_CHAR (pos_byte);
 459   val = SYNTAX_COMEND_FIRST (c);
 460   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 461   return val;
 462 }
 463
 464 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE.  */
 465
 466 /* static int
 467  * prev_char_comstart_first (pos, pos_byte)
 468  *      int pos, pos_byte;
 469  * {
 470  *   int c, val;
 471  *
 472  *   DEC_BOTH (pos, pos_byte);
 473  *   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 474  *   c = FETCH_CHAR (pos_byte);
 475  *   val = SYNTAX_COMSTART_FIRST (c);
 476  *   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 477  *   return val;
 478  * } */
 479
 480 /* Checks whether charpos FROM is at the end of a comment.
 481    FROM_BYTE is the bytepos corresponding to FROM.
 482    Do not move back before STOP.
 483
 484    Return a positive value if we find a comment ending at FROM/FROM_BYTE;
 485    return -1 otherwise.
 486
 487    If successful, store the charpos of the comment's beginning
 488    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
 489
 490    Global syntax data remains valid for backward search starting at
 491    the returned value (or at FROM, if the search was not successful).  */
 492
 493 static int
 494 back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
 495 {
 496   /* Look back, counting the parity of string-quotes,
 497      and recording the comment-starters seen.
 498      When we reach a safe place, assume that's not in a string;
 499      then step the main scan to the earliest comment-starter seen
 500      an even number of string quotes away from the safe place.
 501
 502      OFROM[I] is position of the earliest comment-starter seen
 503      which is I+2X quotes from the comment-end.
 504      PARITY is current parity of quotes from the comment end.  */
 505   int string_style = -1;        /* Presumed outside of any string. */
 506   int string_lossage = 0;
 507   /* Not a real lossage: indicates that we have passed a matching comment
 508      starter plus a non-matching comment-ender, meaning that any matching
 509      comment-starter we might see later could be a false positive (hidden
 510      inside another comment).
 511      Test case:  { a (* b } c (* d *) */
 512   int comment_lossage = 0;
 513   EMACS_INT comment_end = from;
 514   EMACS_INT comment_end_byte = from_byte;
 515   EMACS_INT comstart_pos = 0;
 516   EMACS_INT comstart_byte IF_LINT (= 0);
 517   /* Place where the containing defun starts,
 518      or 0 if we didn't come across it yet.  */
 519   EMACS_INT defun_start = 0;
 520   EMACS_INT defun_start_byte = 0;
 521   register enum syntaxcode code;
 522   int nesting = 1;              /* current comment nesting */
 523   int c;
 524   int syntax = 0;
 525
 526   /* FIXME: A }} comment-ender style leads to incorrect behavior
 527      in the case of {{ c }}} because we ignore the last two chars which are
 528      assumed to be comment-enders although they aren't.  */
 529
 530   /* At beginning of range to scan, we're outside of strings;
 531      that determines quote parity to the comment-end.  */
 532   while (from != stop)
 533     {
 534       EMACS_INT temp_byte;
 535       int prev_syntax, com2start, com2end;
 536       int comstart;
 537
 538       /* Move back and examine a character.  */
 539       DEC_BOTH (from, from_byte);
 540       UPDATE_SYNTAX_TABLE_BACKWARD (from);
 541
 542       prev_syntax = syntax;
 543       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 544       syntax = SYNTAX_WITH_FLAGS (c);
 545       code = SYNTAX (c);
 546
 547       /* Check for 2-char comment markers.  */
 548       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
 549                    && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
 550                    && (comstyle
 551                        == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
 552                    && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
 553                        || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
 554       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
 555                  && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
 556       comstart = (com2start || code == Scomment);
 557
 558       /* Nasty cases with overlapping 2-char comment markers:
 559          - snmp-mode: -- c -- foo -- c --
 560                       --- c --
 561                       ------ c --
 562          - c-mode:    *||*
 563                       |* *|* *|
 564                       |*| |* |*|
 565                       ///   */
 566
 567       /* If a 2-char comment sequence partly overlaps with another,
 568          we don't try to be clever.  E.g. |*| in C, or }% in modes that
 569          have %..\n and %{..}%.  */
 570       if (from > stop && (com2end || comstart))
 571         {
 572           EMACS_INT next = from, next_byte = from_byte;
 573           int next_c, next_syntax;
 574           DEC_BOTH (next, next_byte);
 575           UPDATE_SYNTAX_TABLE_BACKWARD (next);
 576           next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
 577           next_syntax = SYNTAX_WITH_FLAGS (next_c);
 578           if (((comstart || comnested)
 579                && SYNTAX_FLAGS_COMEND_SECOND (syntax)
 580                && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
 581               || ((com2end || comnested)
 582                   && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
 583                   && (comstyle
 584                       == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
 585                   && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
 586             goto lossage;
 587           /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
 588         }
 589
 590       if (com2start && comstart_pos == 0)
 591         /* We're looking at a comment starter.  But it might be a comment
 592            ender as well (see snmp-mode).  The first time we see one, we
 593            need to consider it as a comment starter,
 594            and the subsequent times as a comment ender.  */
 595         com2end = 0;
 596
 597       /* Turn a 2-char comment sequences into the appropriate syntax.  */
 598       if (com2end)
 599         code = Sendcomment;
 600       else if (com2start)
 601         code = Scomment;
 602       /* Ignore comment starters of a different style.  */
 603       else if (code == Scomment
 604                && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
 605                    || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
 606         continue;
 607
 608       /* Ignore escaped characters, except comment-enders.  */
 609       if (code != Sendcomment && char_quoted (from, from_byte))
 610         continue;
 611
 612       switch (code)
 613         {
 614         case Sstring_fence:
 615         case Scomment_fence:
 616           c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
 617         case Sstring:
 618           /* Track parity of quotes.  */
 619           if (string_style == -1)
 620             /* Entering a string.  */
 621             string_style = c;
 622           else if (string_style == c)
 623             /* Leaving the string.  */
 624             string_style = -1;
 625           else
 626             /* If we have two kinds of string delimiters.
 627                There's no way to grok this scanning backwards.  */
 628             string_lossage = 1;
 629           break;
 630
 631         case Scomment:
 632           /* We've already checked that it is the relevant comstyle.  */
 633           if (string_style != -1 || comment_lossage || string_lossage)
 634             /* There are odd string quotes involved, so let's be careful.
 635                Test case in Pascal: " { " a { " } */
 636             goto lossage;
 637
 638           if (!comnested)
 639             {
 640               /* Record best comment-starter so far.  */
 641               comstart_pos = from;
 642               comstart_byte = from_byte;
 643             }
 644           else if (--nesting <= 0)
 645             /* nested comments have to be balanced, so we don't need to
 646                keep looking for earlier ones.  We use here the same (slightly
 647                incorrect) reasoning as below:  since it is followed by uniform
 648                paired string quotes, this comment-start has to be outside of
 649                strings, else the comment-end itself would be inside a string. */
 650             goto done;
 651           break;
 652
 653         case Sendcomment:
 654           if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
 655               && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
 656                   || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
 657             /* This is the same style of comment ender as ours. */
 658             {
 659               if (comnested)
 660                 nesting++;
 661               else
 662                 /* Anything before that can't count because it would match
 663                    this comment-ender rather than ours.  */
 664                 from = stop;    /* Break out of the loop.  */
 665             }
 666           else if (comstart_pos != 0 || c != '\n')
 667             /* We're mixing comment styles here, so we'd better be careful.
 668                The (comstart_pos != 0 || c != '\n') check is not quite correct
 669                (we should just always set comment_lossage), but removing it
 670                would imply that any multiline comment in C would go through
 671                lossage, which seems overkill.
 672                The failure should only happen in the rare cases such as
 673                  { (* } *)   */
 674             comment_lossage = 1;
 675           break;
 676
 677         case Sopen:
 678           /* Assume a defun-start point is outside of strings.  */
 679           if (open_paren_in_column_0_is_defun_start
 680               && (from == stop
 681                   || (temp_byte = dec_bytepos (from_byte),
 682                       FETCH_CHAR (temp_byte) == '\n')))
 683             {
 684               defun_start = from;
 685               defun_start_byte = from_byte;
 686               from = stop;      /* Break out of the loop.  */
 687             }
 688           break;
 689
 690         default:
 691           break;
 692         }
 693     }
 694
 695   if (comstart_pos == 0)
 696     {
 697       from = comment_end;
 698       from_byte = comment_end_byte;
 699       UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
 700     }
 701   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
 702      or `done'), then we've found the beginning of the non-nested comment.  */
 703   else if (1)   /* !comnested */
 704     {
 705       from = comstart_pos;
 706       from_byte = comstart_byte;
 707       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 708     }
 709   else
 710     {
 711       struct lisp_parse_state state;
 712     lossage:
 713       /* We had two kinds of string delimiters mixed up
 714          together.  Decode this going forwards.
 715          Scan fwd from a known safe place (beginning-of-defun)
 716          to the one in question; this records where we
 717          last passed a comment starter.  */
 718       /* If we did not already find the defun start, find it now.  */
 719       if (defun_start == 0)
 720         {
 721           defun_start = find_defun_start (comment_end, comment_end_byte);
 722           defun_start_byte = find_start_value_byte;
 723         }
 724       do
 725         {
 726           scan_sexps_forward (&state,
 727                               defun_start, defun_start_byte,
 728                               comment_end, -10000, 0, Qnil, 0);
 729           defun_start = comment_end;
 730           if (state.incomment == (comnested ? 1 : -1)
 731               && state.comstyle == comstyle)
 732             from = state.comstr_start;
 733           else
 734             {
 735               from = comment_end;
 736               if (state.incomment)
 737                 /* If comment_end is inside some other comment, maybe ours
 738                    is nested, so we need to try again from within the
 739                    surrounding comment.  Example: { a (* " *)  */
 740                 {
 741                   /* FIXME: We should advance by one or two chars. */
 742                   defun_start = state.comstr_start + 2;
 743                   defun_start_byte = CHAR_TO_BYTE (defun_start);
 744                 }
 745             }
 746         } while (defun_start < comment_end);
 747
 748       from_byte = CHAR_TO_BYTE (from);
 749       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 750     }
 751
 752  done:
 753   *charpos_ptr = from;
 754   *bytepos_ptr = from_byte;
 755
 756   return (from == comment_end) ? -1 : from;
 757 }
 758 \f
 759 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
 760        doc: /* Return t if OBJECT is a syntax table.
 761 Currently, any char-table counts as a syntax table.  */)
 762   (Lisp_Object object)
 763 {
 764   if (CHAR_TABLE_P (object)
 765       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
 766     return Qt;
 767   return Qnil;
 768 }
 769
 770 static void
 771 check_syntax_table (Lisp_Object obj)
 772 {
 773   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
 774               Qsyntax_table_p, obj);
 775 }
 776
 777 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
 778        doc: /* Return the current syntax table.
 779 This is the one specified by the current buffer.  */)
 780   (void)
 781 {
 782   return BVAR (current_buffer, syntax_table);
 783 }
 784
 785 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
 786    Sstandard_syntax_table, 0, 0, 0,
 787        doc: /* Return the standard syntax table.
 788 This is the one used for new buffers.  */)
 789   (void)
 790 {
 791   return Vstandard_syntax_table;
 792 }
 793
 794 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
 795        doc: /* Construct a new syntax table and return it.
 796 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
 797   (Lisp_Object table)
 798 {
 799   Lisp_Object copy;
 800
 801   if (!NILP (table))
 802     check_syntax_table (table);
 803   else
 804     table = Vstandard_syntax_table;
 805
 806   copy = Fcopy_sequence (table);
 807
 808   /* Only the standard syntax table should have a default element.
 809      Other syntax tables should inherit from parents instead.  */
 810   XCHAR_TABLE (copy)->defalt = Qnil;
 811
 812   /* Copied syntax tables should all have parents.
 813      If we copied one with no parent, such as the standard syntax table,
 814      use the standard syntax table as the copy's parent.  */
 815   if (NILP (XCHAR_TABLE (copy)->parent))
 816     Fset_char_table_parent (copy, Vstandard_syntax_table);
 817   return copy;
 818 }
 819
 820 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
 821        doc: /* Select a new syntax table for the current buffer.
 822 One argument, a syntax table.  */)
 823   (Lisp_Object table)
 824 {
 825   int idx;
 826   check_syntax_table (table);
 827   BVAR (current_buffer, syntax_table) = table;
 828   /* Indicate that this buffer now has a specified syntax table.  */
 829   idx = PER_BUFFER_VAR_IDX (syntax_table);
 830   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
 831   return table;
 832 }
 833 \f
 834 /* Convert a letter which signifies a syntax code
 835  into the code it signifies.
 836  This is used by modify-syntax-entry, and other things.  */
 837
 838 unsigned char syntax_spec_code[0400] =
 839   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 840     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 841     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 842     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 843     (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
 844         (char) Smath, 0377, 0377, (char) Squote,
 845     (char) Sopen, (char) Sclose, 0377, 0377,
 846         0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
 847     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 848     0377, 0377, 0377, 0377,
 849         (char) Scomment, 0377, (char) Sendcomment, 0377,
 850     (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
 851     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 852     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 853     0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
 854     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
 855     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 856     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 857     0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
 858   };
 859
 860 /* Indexed by syntax code, give the letter that describes it.  */
 861
 862 char syntax_code_spec[16] =
 863   {
 864     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
 865     '!', '|'
 866   };
 867
 868 /* Indexed by syntax code, give the object (cons of syntax code and
 869    nil) to be stored in syntax table.  Since these objects can be
 870    shared among syntax tables, we generate them in advance.  By
 871    sharing objects, the function `describe-syntax' can give a more
 872    compact listing.  */
 873 static Lisp_Object Vsyntax_code_object;
 874
 875 \f
 876 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
 877        doc: /* Return the syntax code of CHARACTER, described by a character.
 878 For example, if CHARACTER is a word constituent, the
 879 character `w' (119) is returned.
 880 The characters that correspond to various syntax codes
 881 are listed in the documentation of `modify-syntax-entry'.  */)
 882   (Lisp_Object character)
 883 {
 884   int char_int;
 885   CHECK_CHARACTER (character);
 886   char_int = XINT (character);
 887   SETUP_BUFFER_SYNTAX_TABLE ();
 888   return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
 889 }
 890
 891 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
 892        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
 893   (Lisp_Object character)
 894 {
 895   int char_int, code;
 896   CHECK_NUMBER (character);
 897   char_int = XINT (character);
 898   SETUP_BUFFER_SYNTAX_TABLE ();
 899   code = SYNTAX (char_int);
 900   if (code == Sopen || code == Sclose)
 901     return SYNTAX_MATCH (char_int);
 902   return Qnil;
 903 }
 904
 905 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
 906        doc: /* Convert a syntax specification STRING into syntax cell form.
 907 STRING should be a string as it is allowed as argument of
 908 `modify-syntax-entry'.  Value is the equivalent cons cell
 909 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
 910 text property.  */)
 911   (Lisp_Object string)
 912 {
 913   register const unsigned char *p;
 914   register enum syntaxcode code;
 915   int val;
 916   Lisp_Object match;
 917
 918   CHECK_STRING (string);
 919
 920   p = SDATA (string);
 921   code = (enum syntaxcode) syntax_spec_code[*p++];
 922   if (((int) code & 0377) == 0377)
 923     error ("Invalid syntax description letter: %c", p[-1]);
 924
 925   if (code == Sinherit)
 926     return Qnil;
 927
 928   if (*p)
 929     {
 930       int len;
 931       int character = STRING_CHAR_AND_LENGTH (p, len);
 932       XSETINT (match, character);
 933       if (XFASTINT (match) == ' ')
 934         match = Qnil;
 935       p += len;
 936     }
 937   else
 938     match = Qnil;
 939
 940   val = (int) code;
 941   while (*p)
 942     switch (*p++)
 943       {
 944       case '1':
 945         val |= 1 << 16;
 946         break;
 947
 948       case '2':
 949         val |= 1 << 17;
 950         break;
 951
 952       case '3':
 953         val |= 1 << 18;
 954         break;
 955
 956       case '4':
 957         val |= 1 << 19;
 958         break;
 959
 960       case 'p':
 961         val |= 1 << 20;
 962         break;
 963
 964       case 'b':
 965         val |= 1 << 21;
 966         break;
 967
 968       case 'n':
 969         val |= 1 << 22;
 970         break;
 971
 972       case 'c':
 973         val |= 1 << 23;
 974         break;
 975       }
 976
 977   if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
 978     return XVECTOR (Vsyntax_code_object)->contents[val];
 979   else
 980     /* Since we can't use a shared object, let's make a new one.  */
 981     return Fcons (make_number (val), match);
 982 }
 983
 984 /* I really don't know why this is interactive
 985    help-form should at least be made useful whilst reading the second arg.  */
 986 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
 987   "cSet syntax for character: \nsSet syntax for %s to: ",
 988        doc: /* Set syntax for character CHAR according to string NEWENTRY.
 989 The syntax is changed only for table SYNTAX-TABLE, which defaults to
 990  the current buffer's syntax table.
 991 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
 992 in the range MIN to MAX are changed.
 993 The first character of NEWENTRY should be one of the following:
 994   Space or -  whitespace syntax.    w   word constituent.
 995   _           symbol constituent.   .   punctuation.
 996   (           open-parenthesis.     )   close-parenthesis.
 997   "           string quote.         \\   escape.
 998   $           paired delimiter.     '   expression quote or prefix operator.
 999   <           comment starter.      >   comment ender.
1000   /           character-quote.      @   inherit from `standard-syntax-table'.
1001   |           generic string fence. !   generic comment fence.
1002
1003 Only single-character comment start and end sequences are represented thus.
1004 Two-character sequences are represented as described below.
1005 The second character of NEWENTRY is the matching parenthesis,
1006  used only if the first character is `(' or `)'.
1007 Any additional characters are flags.
1008 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1009  1 means CHAR is the start of a two-char comment start sequence.
1010  2 means CHAR is the second character of such a sequence.
1011  3 means CHAR is the start of a two-char comment end sequence.
1012  4 means CHAR is the second character of such a sequence.
1013
1014 There can be several orthogonal comment sequences.  This is to support
1015 language modes such as C++.  By default, all comment sequences are of style
1016 a, but you can set the comment sequence style to b (on the second character
1017 of a comment-start, and the first character of a comment-end sequence) and/or
1018 c (on any of its chars) using this flag:
1019  b means CHAR is part of comment sequence b.
1020  c means CHAR is part of comment sequence c.
1021  n means CHAR is part of a nestable comment sequence.
1022
1023  p means CHAR is a prefix character for `backward-prefix-chars';
1024    such characters are treated as whitespace when they occur
1025    between expressions.
1026 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
1027   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1028 {
1029   if (CONSP (c))
1030     {
1031       CHECK_CHARACTER_CAR (c);
1032       CHECK_CHARACTER_CDR (c);
1033     }
1034   else
1035     CHECK_CHARACTER (c);
1036
1037   if (NILP (syntax_table))
1038     syntax_table = BVAR (current_buffer, syntax_table);
1039   else
1040     check_syntax_table (syntax_table);
1041
1042   newentry = Fstring_to_syntax (newentry);
1043   if (CONSP (c))
1044     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1045   else
1046     SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1047
1048   /* We clear the regexp cache, since character classes can now have
1049      different values from those in the compiled regexps.*/
1050   clear_regexp_cache ();
1051
1052   return Qnil;
1053 }
1054 \f
1055 /* Dump syntax table to buffer in human-readable format */
1056
1057 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1058        Sinternal_describe_syntax_value, 1, 1, 0,
1059        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1060   (Lisp_Object syntax)
1061 {
1062   register enum syntaxcode code;
1063   int syntax_code;
1064   char desc, start1, start2, end1, end2, prefix,
1065     comstyleb, comstylec, comnested;
1066   char str[2];
1067   Lisp_Object first, match_lisp, value = syntax;
1068
1069   if (NILP (value))
1070     {
1071       insert_string ("default");
1072       return syntax;
1073     }
1074
1075   if (CHAR_TABLE_P (value))
1076     {
1077       insert_string ("deeper char-table ...");
1078       return syntax;
1079     }
1080
1081   if (!CONSP (value))
1082     {
1083       insert_string ("invalid");
1084       return syntax;
1085     }
1086
1087   first = XCAR (value);
1088   match_lisp = XCDR (value);
1089
1090   if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1091     {
1092       insert_string ("invalid");
1093       return syntax;
1094     }
1095
1096   syntax_code = XINT (first);
1097   code = (enum syntaxcode) (syntax_code & 0377);
1098   start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1099   start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1100   end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1101   end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1102   prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1103   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1104   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1105   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1106
1107   if ((int) code < 0 || (int) code >= (int) Smax)
1108     {
1109       insert_string ("invalid");
1110       return syntax;
1111     }
1112   desc = syntax_code_spec[(int) code];
1113
1114   str[0] = desc, str[1] = 0;
1115   insert (str, 1);
1116
1117   if (NILP (match_lisp))
1118     insert (" ", 1);
1119   else
1120     insert_char (XINT (match_lisp));
1121
1122   if (start1)
1123     insert ("1", 1);
1124   if (start2)
1125     insert ("2", 1);
1126
1127   if (end1)
1128     insert ("3", 1);
1129   if (end2)
1130     insert ("4", 1);
1131
1132   if (prefix)
1133     insert ("p", 1);
1134   if (comstyleb)
1135     insert ("b", 1);
1136   if (comstylec)
1137     insert ("c", 1);
1138   if (comnested)
1139     insert ("n", 1);
1140
1141   insert_string ("\twhich means: ");
1142
1143   switch (SWITCH_ENUM_CAST (code))
1144     {
1145     case Swhitespace:
1146       insert_string ("whitespace"); break;
1147     case Spunct:
1148       insert_string ("punctuation"); break;
1149     case Sword:
1150       insert_string ("word"); break;
1151     case Ssymbol:
1152       insert_string ("symbol"); break;
1153     case Sopen:
1154       insert_string ("open"); break;
1155     case Sclose:
1156       insert_string ("close"); break;
1157     case Squote:
1158       insert_string ("prefix"); break;
1159     case Sstring:
1160       insert_string ("string"); break;
1161     case Smath:
1162       insert_string ("math"); break;
1163     case Sescape:
1164       insert_string ("escape"); break;
1165     case Scharquote:
1166       insert_string ("charquote"); break;
1167     case Scomment:
1168       insert_string ("comment"); break;
1169     case Sendcomment:
1170       insert_string ("endcomment"); break;
1171     case Sinherit:
1172       insert_string ("inherit"); break;
1173     case Scomment_fence:
1174       insert_string ("comment fence"); break;
1175     case Sstring_fence:
1176       insert_string ("string fence"); break;
1177     default:
1178       insert_string ("invalid");
1179       return syntax;
1180     }
1181
1182   if (!NILP (match_lisp))
1183     {
1184       insert_string (", matches ");
1185       insert_char (XINT (match_lisp));
1186     }
1187
1188   if (start1)
1189     insert_string (",\n\t  is the first character of a comment-start sequence");
1190   if (start2)
1191     insert_string (",\n\t  is the second character of a comment-start sequence");
1192
1193   if (end1)
1194     insert_string (",\n\t  is the first character of a comment-end sequence");
1195   if (end2)
1196     insert_string (",\n\t  is the second character of a comment-end sequence");
1197   if (comstyleb)
1198     insert_string (" (comment style b)");
1199   if (comstylec)
1200     insert_string (" (comment style c)");
1201   if (comnested)
1202     insert_string (" (nestable)");
1203
1204   if (prefix)
1205     insert_string (",\n\t  is a prefix character for `backward-prefix-chars'");
1206
1207   return syntax;
1208 }
1209 \f
1210 /* Return the position across COUNT words from FROM.
1211    If that many words cannot be found before the end of the buffer, return 0.
1212    COUNT negative means scan backward and stop at word beginning.  */
1213
1214 EMACS_INT
1215 scan_words (register EMACS_INT from, register EMACS_INT count)
1216 {
1217   register EMACS_INT beg = BEGV;
1218   register EMACS_INT end = ZV;
1219   register EMACS_INT from_byte = CHAR_TO_BYTE (from);
1220   register enum syntaxcode code;
1221   int ch0, ch1;
1222   Lisp_Object func, pos;
1223
1224   immediate_quit = 1;
1225   QUIT;
1226
1227   SETUP_SYNTAX_TABLE (from, count);
1228
1229   while (count > 0)
1230     {
1231       while (1)
1232         {
1233           if (from == end)
1234             {
1235               immediate_quit = 0;
1236               return 0;
1237             }
1238           UPDATE_SYNTAX_TABLE_FORWARD (from);
1239           ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1240           code = SYNTAX (ch0);
1241           INC_BOTH (from, from_byte);
1242           if (words_include_escapes
1243               && (code == Sescape || code == Scharquote))
1244             break;
1245           if (code == Sword)
1246             break;
1247         }
1248       /* Now CH0 is a character which begins a word and FROM is the
1249          position of the next character.  */
1250       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1251       if (! NILP (Ffboundp (func)))
1252         {
1253           pos = call2 (func, make_number (from - 1), make_number (end));
1254           if (INTEGERP (pos) && XINT (pos) > from)
1255             {
1256               from = XINT (pos);
1257               from_byte = CHAR_TO_BYTE (from);
1258             }
1259         }
1260       else
1261         {
1262           while (1)
1263             {
1264               if (from == end) break;
1265               UPDATE_SYNTAX_TABLE_FORWARD (from);
1266               ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1267               code = SYNTAX (ch1);
1268               if ((code != Sword
1269                    && (! words_include_escapes
1270                        || (code != Sescape && code != Scharquote)))
1271                   || word_boundary_p (ch0, ch1))
1272                 break;
1273               INC_BOTH (from, from_byte);
1274               ch0 = ch1;
1275             }
1276         }
1277       count--;
1278     }
1279   while (count < 0)
1280     {
1281       while (1)
1282         {
1283           if (from == beg)
1284             {
1285               immediate_quit = 0;
1286               return 0;
1287             }
1288           DEC_BOTH (from, from_byte);
1289           UPDATE_SYNTAX_TABLE_BACKWARD (from);
1290           ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1291           code = SYNTAX (ch1);
1292           if (words_include_escapes
1293               && (code == Sescape || code == Scharquote))
1294             break;
1295           if (code == Sword)
1296             break;
1297         }
1298       /* Now CH1 is a character which ends a word and FROM is the
1299          position of it.  */
1300       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1301       if (! NILP (Ffboundp (func)))
1302         {
1303           pos = call2 (func, make_number (from), make_number (beg));
1304           if (INTEGERP (pos) && XINT (pos) < from)
1305             {
1306               from = XINT (pos);
1307               from_byte = CHAR_TO_BYTE (from);
1308             }
1309         }
1310       else
1311         {
1312           while (1)
1313             {
1314               if (from == beg)
1315                 break;
1316               DEC_BOTH (from, from_byte);
1317               UPDATE_SYNTAX_TABLE_BACKWARD (from);
1318               ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1319               code = SYNTAX (ch0);
1320               if ((code != Sword
1321                    && (! words_include_escapes
1322                        || (code != Sescape && code != Scharquote)))
1323                   || word_boundary_p (ch0, ch1))
1324                 {
1325                   INC_BOTH (from, from_byte);
1326                   break;
1327                 }
1328               ch1 = ch0;
1329             }
1330         }
1331       count++;
1332     }
1333
1334   immediate_quit = 0;
1335
1336   return from;
1337 }
1338
1339 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1340        doc: /* Move point forward ARG words (backward if ARG is negative).
1341 Normally returns t.
1342 If an edge of the buffer or a field boundary is reached, point is left there
1343 and the function returns nil.  Field boundaries are not noticed if
1344 `inhibit-field-text-motion' is non-nil.  */)
1345   (Lisp_Object arg)
1346 {
1347   Lisp_Object tmp;
1348   int orig_val, val;
1349
1350   if (NILP (arg))
1351     XSETFASTINT (arg, 1);
1352   else
1353     CHECK_NUMBER (arg);
1354
1355   val = orig_val = scan_words (PT, XINT (arg));
1356   if (! orig_val)
1357     val = XINT (arg) > 0 ? ZV : BEGV;
1358
1359   /* Avoid jumping out of an input field.  */
1360   tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1361                              Qt, Qnil, Qnil);
1362   val = XFASTINT (tmp);
1363
1364   SET_PT (val);
1365   return val == orig_val ? Qt : Qnil;
1366 }
1367 \f
1368 Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
1369
1370 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1371        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1372 STRING is like the inside of a `[...]' in a regular expression
1373 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1374  (but not at the end of a range; quoting is never needed there).
1375 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1376 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1377 Char classes, e.g. `[:alpha:]', are supported.
1378
1379 Returns the distance traveled, either zero or positive.  */)
1380   (Lisp_Object string, Lisp_Object lim)
1381 {
1382   return skip_chars (1, string, lim, 1);
1383 }
1384
1385 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1386        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1387 See `skip-chars-forward' for details.
1388 Returns the distance traveled, either zero or negative.  */)
1389   (Lisp_Object string, Lisp_Object lim)
1390 {
1391   return skip_chars (0, string, lim, 1);
1392 }
1393
1394 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1395        doc: /* Move point forward across chars in specified syntax classes.
1396 SYNTAX is a string of syntax code characters.
1397 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1398 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1399 This function returns the distance traveled, either zero or positive.  */)
1400   (Lisp_Object syntax, Lisp_Object lim)
1401 {
1402   return skip_syntaxes (1, syntax, lim);
1403 }
1404
1405 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1406        doc: /* Move point backward across chars in specified syntax classes.
1407 SYNTAX is a string of syntax code characters.
1408 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1409 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1410 This function returns the distance traveled, either zero or negative.  */)
1411   (Lisp_Object syntax, Lisp_Object lim)
1412 {
1413   return skip_syntaxes (0, syntax, lim);
1414 }
1415
1416 static Lisp_Object
1417 skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
1418 {
1419   register unsigned int c;
1420   unsigned char fastmap[0400];
1421   /* Store the ranges of non-ASCII characters.  */
1422   int *char_ranges IF_LINT (= NULL);
1423   int n_char_ranges = 0;
1424   int negate = 0;
1425   register EMACS_INT i, i_byte;
1426   /* Set to 1 if the current buffer is multibyte and the region
1427      contains non-ASCII chars.  */
1428   int multibyte;
1429   /* Set to 1 if STRING is multibyte and it contains non-ASCII
1430      chars.  */
1431   int string_multibyte;
1432   EMACS_INT size_byte;
1433   const unsigned char *str;
1434   int len;
1435   Lisp_Object iso_classes;
1436
1437   CHECK_STRING (string);
1438   iso_classes = Qnil;
1439
1440   if (NILP (lim))
1441     XSETINT (lim, forwardp ? ZV : BEGV);
1442   else
1443     CHECK_NUMBER_COERCE_MARKER (lim);
1444
1445   /* In any case, don't allow scan outside bounds of buffer.  */
1446   if (XINT (lim) > ZV)
1447     XSETFASTINT (lim, ZV);
1448   if (XINT (lim) < BEGV)
1449     XSETFASTINT (lim, BEGV);
1450
1451   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1452                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1453   string_multibyte = SBYTES (string) > SCHARS (string);
1454
1455   memset (fastmap, 0, sizeof fastmap);
1456
1457   str = SDATA (string);
1458   size_byte = SBYTES (string);
1459
1460   i_byte = 0;
1461   if (i_byte < size_byte
1462       && SREF (string, 0) == '^')
1463     {
1464       negate = 1; i_byte++;
1465     }
1466
1467   /* Find the characters specified and set their elements of fastmap.
1468      Handle backslashes and ranges specially.
1469
1470      If STRING contains non-ASCII characters, setup char_ranges for
1471      them and use fastmap only for their leading codes.  */
1472
1473   if (! string_multibyte)
1474     {
1475       int string_has_eight_bit = 0;
1476
1477       /* At first setup fastmap.  */
1478       while (i_byte < size_byte)
1479         {
1480           c = str[i_byte++];
1481
1482           if (handle_iso_classes && c == '['
1483               && i_byte < size_byte
1484               && str[i_byte] == ':')
1485             {
1486               const unsigned char *class_beg = str + i_byte + 1;
1487               const unsigned char *class_end = class_beg;
1488               const unsigned char *class_limit = str + size_byte - 2;
1489               /* Leave room for the null.  */
1490               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1491               re_wctype_t cc;
1492
1493               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1494                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1495
1496               while (class_end < class_limit
1497                      && *class_end >= 'a' && *class_end <= 'z')
1498                 class_end++;
1499
1500               if (class_end == class_beg
1501                   || *class_end != ':' || class_end[1] != ']')
1502                 goto not_a_class_name;
1503
1504               memcpy (class_name, class_beg, class_end - class_beg);
1505               class_name[class_end - class_beg] = 0;
1506
1507               cc = re_wctype (class_name);
1508               if (cc == 0)
1509                 error ("Invalid ISO C character class");
1510
1511               iso_classes = Fcons (make_number (cc), iso_classes);
1512
1513               i_byte = class_end + 2 - str;
1514               continue;
1515             }
1516
1517         not_a_class_name:
1518           if (c == '\\')
1519             {
1520               if (i_byte == size_byte)
1521                 break;
1522
1523               c = str[i_byte++];
1524             }
1525           /* Treat `-' as range character only if another character
1526              follows.  */
1527           if (i_byte + 1 < size_byte
1528               && str[i_byte] == '-')
1529             {
1530               unsigned int c2;
1531
1532               /* Skip over the dash.  */
1533               i_byte++;
1534
1535               /* Get the end of the range.  */
1536               c2 = str[i_byte++];
1537               if (c2 == '\\'
1538                   && i_byte < size_byte)
1539                 c2 = str[i_byte++];
1540
1541               if (c <= c2)
1542                 {
1543                   while (c <= c2)
1544                     fastmap[c++] = 1;
1545                   if (! ASCII_CHAR_P (c2))
1546                     string_has_eight_bit = 1;
1547                 }
1548             }
1549           else
1550             {
1551               fastmap[c] = 1;
1552               if (! ASCII_CHAR_P (c))
1553                 string_has_eight_bit = 1;
1554             }
1555         }
1556
1557       /* If the current range is multibyte and STRING contains
1558          eight-bit chars, arrange fastmap and setup char_ranges for
1559          the corresponding multibyte chars.  */
1560       if (multibyte && string_has_eight_bit)
1561         {
1562           unsigned char fastmap2[0400];
1563           int range_start_byte, range_start_char;
1564
1565           memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1566           memset (fastmap + 0200, 0, 0200);
1567           /* We are sure that this loop stops.  */
1568           for (i = 0200; ! fastmap2[i]; i++);
1569           c = BYTE8_TO_CHAR (i);
1570           fastmap[CHAR_LEADING_CODE (c)] = 1;
1571           range_start_byte = i;
1572           range_start_char = c;
1573           char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1574           for (i = 129; i < 0400; i++)
1575             {
1576               c = BYTE8_TO_CHAR (i);
1577               fastmap[CHAR_LEADING_CODE (c)] = 1;
1578               if (i - range_start_byte != c - range_start_char)
1579                 {
1580                   char_ranges[n_char_ranges++] = range_start_char;
1581                   char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1582                                                   + range_start_char);
1583                   range_start_byte = i;
1584                   range_start_char = c;
1585                 }
1586             }
1587           char_ranges[n_char_ranges++] = range_start_char;
1588           char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1589                                           + range_start_char);
1590         }
1591     }
1592   else                          /* STRING is multibyte */
1593     {
1594       char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1595
1596       while (i_byte < size_byte)
1597         {
1598           unsigned char leading_code;
1599
1600           leading_code = str[i_byte];
1601           c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1602           i_byte += len;
1603
1604           if (handle_iso_classes && c == '['
1605               && i_byte < size_byte
1606               && STRING_CHAR (str + i_byte) == ':')
1607             {
1608               const unsigned char *class_beg = str + i_byte + 1;
1609               const unsigned char *class_end = class_beg;
1610               const unsigned char *class_limit = str + size_byte - 2;
1611               /* Leave room for the null.        */
1612               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1613               re_wctype_t cc;
1614
1615               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1616                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1617
1618               while (class_end < class_limit
1619                      && *class_end >= 'a' && *class_end <= 'z')
1620                 class_end++;
1621
1622               if (class_end == class_beg
1623                   || *class_end != ':' || class_end[1] != ']')
1624                 goto not_a_class_name_multibyte;
1625
1626               memcpy (class_name, class_beg, class_end - class_beg);
1627               class_name[class_end - class_beg] = 0;
1628
1629               cc = re_wctype (class_name);
1630               if (cc == 0)
1631                 error ("Invalid ISO C character class");
1632
1633               iso_classes = Fcons (make_number (cc), iso_classes);
1634
1635               i_byte = class_end + 2 - str;
1636               continue;
1637             }
1638
1639         not_a_class_name_multibyte:
1640           if (c == '\\')
1641             {
1642               if (i_byte == size_byte)
1643                 break;
1644
1645               leading_code = str[i_byte];
1646               c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1647               i_byte += len;
1648             }
1649           /* Treat `-' as range character only if another character
1650              follows.  */
1651           if (i_byte + 1 < size_byte
1652               && str[i_byte] == '-')
1653             {
1654               unsigned int c2;
1655               unsigned char leading_code2;
1656
1657               /* Skip over the dash.  */
1658               i_byte++;
1659
1660               /* Get the end of the range.  */
1661               leading_code2 = str[i_byte];
1662               c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1663               i_byte += len;
1664
1665               if (c2 == '\\'
1666                   && i_byte < size_byte)
1667                 {
1668                   leading_code2 = str[i_byte];
1669                   c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
1670                   i_byte += len;
1671                 }
1672
1673               if (c > c2)
1674                 continue;
1675               if (ASCII_CHAR_P (c))
1676                 {
1677                   while (c <= c2 && c < 0x80)
1678                     fastmap[c++] = 1;
1679                   leading_code = CHAR_LEADING_CODE (c);
1680                 }
1681               if (! ASCII_CHAR_P (c))
1682                 {
1683                   while (leading_code <= leading_code2)
1684                     fastmap[leading_code++] = 1;
1685                   if (c <= c2)
1686                     {
1687                       char_ranges[n_char_ranges++] = c;
1688                       char_ranges[n_char_ranges++] = c2;
1689                     }
1690                 }
1691             }
1692           else
1693             {
1694               if (ASCII_CHAR_P (c))
1695                 fastmap[c] = 1;
1696               else
1697                 {
1698                   fastmap[leading_code] = 1;
1699                   char_ranges[n_char_ranges++] = c;
1700                   char_ranges[n_char_ranges++] = c;
1701                 }
1702             }
1703         }
1704
1705       /* If the current range is unibyte and STRING contains non-ASCII
1706          chars, arrange fastmap for the corresponding unibyte
1707          chars.  */
1708
1709       if (! multibyte && n_char_ranges > 0)
1710         {
1711           memset (fastmap + 0200, 0, 0200);
1712           for (i = 0; i < n_char_ranges; i += 2)
1713             {
1714               int c1 = char_ranges[i];
1715               int c2 = char_ranges[i + 1];
1716
1717               for (; c1 <= c2; c1++)
1718                 {
1719                   int b = CHAR_TO_BYTE_SAFE (c1);
1720                   if (b >= 0)
1721                     fastmap[b] = 1;
1722                 }
1723             }
1724         }
1725     }
1726
1727   /* If ^ was the first character, complement the fastmap.  */
1728   if (negate)
1729     {
1730       if (! multibyte)
1731         for (i = 0; i < sizeof fastmap; i++)
1732           fastmap[i] ^= 1;
1733       else
1734         {
1735           for (i = 0; i < 0200; i++)
1736             fastmap[i] ^= 1;
1737           /* All non-ASCII chars possibly match.  */
1738           for (; i < sizeof fastmap; i++)
1739             fastmap[i] = 1;
1740         }
1741     }
1742
1743   {
1744     EMACS_INT start_point = PT;
1745     EMACS_INT pos = PT;
1746     EMACS_INT pos_byte = PT_BYTE;
1747     unsigned char *p = PT_ADDR, *endp, *stop;
1748
1749     if (forwardp)
1750       {
1751         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1752         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1753       }
1754     else
1755       {
1756         endp = CHAR_POS_ADDR (XINT (lim));
1757         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1758       }
1759
1760     immediate_quit = 1;
1761     /* This code may look up syntax tables using macros that rely on the
1762        gl_state object.  To make sure this object is not out of date,
1763        let's initialize it manually.
1764        We ignore syntax-table text-properties for now, since that's
1765        what we've done in the past.  */
1766     SETUP_BUFFER_SYNTAX_TABLE ();
1767     if (forwardp)
1768       {
1769         if (multibyte)
1770           while (1)
1771             {
1772               int nbytes;
1773
1774               if (p >= stop)
1775                 {
1776                   if (p >= endp)
1777                     break;
1778                   p = GAP_END_ADDR;
1779                   stop = endp;
1780                 }
1781               c = STRING_CHAR_AND_LENGTH (p, nbytes);
1782               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1783                 {
1784                   if (negate)
1785                     break;
1786                   else
1787                     goto fwd_ok;
1788                 }
1789
1790               if (! fastmap[*p])
1791                 break;
1792               if (! ASCII_CHAR_P (c))
1793                 {
1794                   /* As we are looking at a multibyte character, we
1795                      must look up the character in the table
1796                      CHAR_RANGES.  If there's no data in the table,
1797                      that character is not what we want to skip.  */
1798
1799                   /* The following code do the right thing even if
1800                      n_char_ranges is zero (i.e. no data in
1801                      CHAR_RANGES).  */
1802                   for (i = 0; i < n_char_ranges; i += 2)
1803                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1804                       break;
1805                   if (!(negate ^ (i < n_char_ranges)))
1806                     break;
1807                 }
1808             fwd_ok:
1809               p += nbytes, pos++, pos_byte += nbytes;
1810             }
1811         else
1812           while (1)
1813             {
1814               if (p >= stop)
1815                 {
1816                   if (p >= endp)
1817                     break;
1818                   p = GAP_END_ADDR;
1819                   stop = endp;
1820                 }
1821
1822               if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1823                 {
1824                   if (negate)
1825                     break;
1826                   else
1827                     goto fwd_unibyte_ok;
1828                 }
1829
1830               if (!fastmap[*p])
1831                 break;
1832             fwd_unibyte_ok:
1833               p++, pos++, pos_byte++;
1834             }
1835       }
1836     else
1837       {
1838         if (multibyte)
1839           while (1)
1840             {
1841               unsigned char *prev_p;
1842
1843               if (p <= stop)
1844                 {
1845                   if (p <= endp)
1846                     break;
1847                   p = GPT_ADDR;
1848                   stop = endp;
1849                 }
1850               prev_p = p;
1851               while (--p >= stop && ! CHAR_HEAD_P (*p));
1852               c = STRING_CHAR (p);
1853
1854               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1855                 {
1856                   if (negate)
1857                     break;
1858                   else
1859                     goto back_ok;
1860                 }
1861
1862               if (! fastmap[*p])
1863                 break;
1864               if (! ASCII_CHAR_P (c))
1865                 {
1866                   /* See the comment in the previous similar code.  */
1867                   for (i = 0; i < n_char_ranges; i += 2)
1868                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1869                       break;
1870                   if (!(negate ^ (i < n_char_ranges)))
1871                     break;
1872                 }
1873             back_ok:
1874               pos--, pos_byte -= prev_p - p;
1875             }
1876         else
1877           while (1)
1878             {
1879               if (p <= stop)
1880                 {
1881                   if (p <= endp)
1882                     break;
1883                   p = GPT_ADDR;
1884                   stop = endp;
1885                 }
1886
1887               if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1888                 {
1889                   if (negate)
1890                     break;
1891                   else
1892                     goto back_unibyte_ok;
1893                 }
1894
1895               if (!fastmap[p[-1]])
1896                 break;
1897             back_unibyte_ok:
1898               p--, pos--, pos_byte--;
1899             }
1900       }
1901
1902     SET_PT_BOTH (pos, pos_byte);
1903     immediate_quit = 0;
1904
1905     return make_number (PT - start_point);
1906   }
1907 }
1908
1909
1910 static Lisp_Object
1911 skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
1912 {
1913   register unsigned int c;
1914   unsigned char fastmap[0400];
1915   int negate = 0;
1916   register EMACS_INT i, i_byte;
1917   int multibyte;
1918   EMACS_INT size_byte;
1919   unsigned char *str;
1920
1921   CHECK_STRING (string);
1922
1923   if (NILP (lim))
1924     XSETINT (lim, forwardp ? ZV : BEGV);
1925   else
1926     CHECK_NUMBER_COERCE_MARKER (lim);
1927
1928   /* In any case, don't allow scan outside bounds of buffer.  */
1929   if (XINT (lim) > ZV)
1930     XSETFASTINT (lim, ZV);
1931   if (XINT (lim) < BEGV)
1932     XSETFASTINT (lim, BEGV);
1933
1934   if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1935     return make_number (0);
1936
1937   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1938                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1939
1940   memset (fastmap, 0, sizeof fastmap);
1941
1942   if (SBYTES (string) > SCHARS (string))
1943     /* As this is very rare case (syntax spec is ASCII only), don't
1944        consider efficiency.  */
1945     string = string_make_unibyte (string);
1946
1947   str = SDATA (string);
1948   size_byte = SBYTES (string);
1949
1950   i_byte = 0;
1951   if (i_byte < size_byte
1952       && SREF (string, 0) == '^')
1953     {
1954       negate = 1; i_byte++;
1955     }
1956
1957   /* Find the syntaxes specified and set their elements of fastmap.  */
1958
1959   while (i_byte < size_byte)
1960     {
1961       c = str[i_byte++];
1962       fastmap[syntax_spec_code[c]] = 1;
1963     }
1964
1965   /* If ^ was the first character, complement the fastmap.  */
1966   if (negate)
1967     for (i = 0; i < sizeof fastmap; i++)
1968       fastmap[i] ^= 1;
1969
1970   {
1971     EMACS_INT start_point = PT;
1972     EMACS_INT pos = PT;
1973     EMACS_INT pos_byte = PT_BYTE;
1974     unsigned char *p = PT_ADDR, *endp, *stop;
1975
1976     if (forwardp)
1977       {
1978         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1979         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1980       }
1981     else
1982       {
1983         endp = CHAR_POS_ADDR (XINT (lim));
1984         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1985       }
1986
1987     immediate_quit = 1;
1988     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1989     if (forwardp)
1990       {
1991         if (multibyte)
1992           {
1993             while (1)
1994               {
1995                 int nbytes;
1996
1997                 if (p >= stop)
1998                   {
1999                     if (p >= endp)
2000                       break;
2001                     p = GAP_END_ADDR;
2002                     stop = endp;
2003                   }
2004                 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2005                 if (! fastmap[(int) SYNTAX (c)])
2006                   break;
2007                 p += nbytes, pos++, pos_byte += nbytes;
2008                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2009               }
2010           }
2011         else
2012           {
2013             while (1)
2014               {
2015                 if (p >= stop)
2016                   {
2017                     if (p >= endp)
2018                       break;
2019                     p = GAP_END_ADDR;
2020                     stop = endp;
2021                   }
2022                 if (! fastmap[(int) SYNTAX (*p)])
2023                   break;
2024                 p++, pos++, pos_byte++;
2025                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2026               }
2027           }
2028       }
2029     else
2030       {
2031         if (multibyte)
2032           {
2033             while (1)
2034               {
2035                 unsigned char *prev_p;
2036
2037                 if (p <= stop)
2038                   {
2039                     if (p <= endp)
2040                       break;
2041                     p = GPT_ADDR;
2042                     stop = endp;
2043                   }
2044                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2045                 prev_p = p;
2046                 while (--p >= stop && ! CHAR_HEAD_P (*p));
2047                 c = STRING_CHAR (p);
2048                 if (! fastmap[(int) SYNTAX (c)])
2049                   break;
2050                 pos--, pos_byte -= prev_p - p;
2051               }
2052           }
2053         else
2054           {
2055             while (1)
2056               {
2057                 if (p <= stop)
2058                   {
2059                     if (p <= endp)
2060                       break;
2061                     p = GPT_ADDR;
2062                     stop = endp;
2063                   }
2064                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2065                 if (! fastmap[(int) SYNTAX (p[-1])])
2066                   break;
2067                 p--, pos--, pos_byte--;
2068               }
2069           }
2070       }
2071
2072     SET_PT_BOTH (pos, pos_byte);
2073     immediate_quit = 0;
2074
2075     return make_number (PT - start_point);
2076   }
2077 }
2078
2079 /* Return 1 if character C belongs to one of the ISO classes
2080    in the list ISO_CLASSES.  Each class is represented by an
2081    integer which is its type according to re_wctype.  */
2082
2083 static int
2084 in_classes (int c, Lisp_Object iso_classes)
2085 {
2086   int fits_class = 0;
2087
2088   while (CONSP (iso_classes))
2089     {
2090       Lisp_Object elt;
2091       elt = XCAR (iso_classes);
2092       iso_classes = XCDR (iso_classes);
2093
2094       if (re_iswctype (c, XFASTINT (elt)))
2095         fits_class = 1;
2096     }
2097
2098   return fits_class;
2099 }
2100 \f
2101 /* Jump over a comment, assuming we are at the beginning of one.
2102    FROM is the current position.
2103    FROM_BYTE is the bytepos corresponding to FROM.
2104    Do not move past STOP (a charpos).
2105    The comment over which we have to jump is of style STYLE
2106      (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2107    NESTING should be positive to indicate the nesting at the beginning
2108      for nested comments and should be zero or negative else.
2109      ST_COMMENT_STYLE cannot be nested.
2110    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2111      (or 0 If the search cannot start in the middle of a two-character).
2112
2113    If successful, return 1 and store the charpos of the comment's end
2114    into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2115    Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2116    corresponding bytepos into *BYTEPOS_PTR and the current nesting
2117    (as defined for state.incomment) in *INCOMMENT_PTR.
2118
2119    The comment end is the last character of the comment rather than the
2120      character just after the comment.
2121
2122    Global syntax data is assumed to initially be valid for FROM and
2123    remains valid for forward search starting at the returned position. */
2124
2125 static int
2126 forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2127               int nesting, int style, int prev_syntax,
2128               EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2129               int *incomment_ptr)
2130 {
2131   register int c, c1;
2132   register enum syntaxcode code;
2133   register int syntax, other_syntax;
2134
2135   if (nesting <= 0) nesting = -1;
2136
2137   /* Enter the loop in the middle so that we find
2138      a 2-char comment ender if we start in the middle of it.  */
2139   syntax = prev_syntax;
2140   if (syntax != 0) goto forw_incomment;
2141
2142   while (1)
2143     {
2144       if (from == stop)
2145         {
2146           *incomment_ptr = nesting;
2147           *charpos_ptr = from;
2148           *bytepos_ptr = from_byte;
2149           return 0;
2150         }
2151       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2152       syntax = SYNTAX_WITH_FLAGS (c);
2153       code = syntax & 0xff;
2154       if (code == Sendcomment
2155           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2156           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2157               (nesting > 0 && --nesting == 0) : nesting < 0))
2158         /* we have encountered a comment end of the same style
2159            as the comment sequence which began this comment
2160            section */
2161         break;
2162       if (code == Scomment_fence
2163           && style == ST_COMMENT_STYLE)
2164         /* we have encountered a comment end of the same style
2165            as the comment sequence which began this comment
2166            section.  */
2167         break;
2168       if (nesting > 0
2169           && code == Scomment
2170           && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2171           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2172         /* we have encountered a nested comment of the same style
2173            as the comment sequence which began this comment section */
2174         nesting++;
2175       INC_BOTH (from, from_byte);
2176       UPDATE_SYNTAX_TABLE_FORWARD (from);
2177
2178     forw_incomment:
2179       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2180           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2181               other_syntax = SYNTAX_WITH_FLAGS (c1),
2182               SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2183           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2184           && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2185                SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2186               ? nesting > 0 : nesting < 0))
2187         {
2188           if (--nesting <= 0)
2189             /* we have encountered a comment end of the same style
2190                as the comment sequence which began this comment
2191                section */
2192             break;
2193           else
2194             {
2195               INC_BOTH (from, from_byte);
2196               UPDATE_SYNTAX_TABLE_FORWARD (from);
2197             }
2198         }
2199       if (nesting > 0
2200           && from < stop
2201           && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2202           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2203               other_syntax = SYNTAX_WITH_FLAGS (c1),
2204               SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2205               && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2206           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2207               SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2208         /* we have encountered a nested comment of the same style
2209            as the comment sequence which began this comment
2210            section */
2211         {
2212           INC_BOTH (from, from_byte);
2213           UPDATE_SYNTAX_TABLE_FORWARD (from);
2214           nesting++;
2215         }
2216     }
2217   *charpos_ptr = from;
2218   *bytepos_ptr = from_byte;
2219   return 1;
2220 }
2221
2222 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2223        doc: /*
2224 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
2225 Stop scanning if we find something other than a comment or whitespace.
2226 Set point to where scanning stops.
2227 If COUNT comments are found as expected, with nothing except whitespace
2228 between them, return t; otherwise return nil.  */)
2229   (Lisp_Object count)
2230 {
2231   register EMACS_INT from;
2232   EMACS_INT from_byte;
2233   register EMACS_INT stop;
2234   register int c, c1;
2235   register enum syntaxcode code;
2236   int comstyle = 0;         /* style of comment encountered */
2237   int comnested = 0;        /* whether the comment is nestable or not */
2238   int found;
2239   EMACS_INT count1;
2240   EMACS_INT out_charpos, out_bytepos;
2241   int dummy;
2242
2243   CHECK_NUMBER (count);
2244   count1 = XINT (count);
2245   stop = count1 > 0 ? ZV : BEGV;
2246
2247   immediate_quit = 1;
2248   QUIT;
2249
2250   from = PT;
2251   from_byte = PT_BYTE;
2252
2253   SETUP_SYNTAX_TABLE (from, count1);
2254   while (count1 > 0)
2255     {
2256       do
2257         {
2258           int comstart_first, syntax, other_syntax;
2259
2260           if (from == stop)
2261             {
2262               SET_PT_BOTH (from, from_byte);
2263               immediate_quit = 0;
2264               return Qnil;
2265             }
2266           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2267           syntax = SYNTAX_WITH_FLAGS (c);
2268           code = SYNTAX (c);
2269           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2270           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2271           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2272           INC_BOTH (from, from_byte);
2273           UPDATE_SYNTAX_TABLE_FORWARD (from);
2274           if (from < stop && comstart_first
2275               && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2276                   other_syntax = SYNTAX_WITH_FLAGS (c1),
2277                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2278             {
2279               /* We have encountered a comment start sequence and we
2280                  are ignoring all text inside comments.  We must record
2281                  the comment style this sequence begins so that later,
2282                  only a comment end of the same style actually ends
2283                  the comment section.  */
2284               code = Scomment;
2285               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2286               comnested
2287                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2288               INC_BOTH (from, from_byte);
2289               UPDATE_SYNTAX_TABLE_FORWARD (from);
2290             }
2291         }
2292       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2293
2294       if (code == Scomment_fence)
2295         comstyle = ST_COMMENT_STYLE;
2296       else if (code != Scomment)
2297         {
2298           immediate_quit = 0;
2299           DEC_BOTH (from, from_byte);
2300           SET_PT_BOTH (from, from_byte);
2301           return Qnil;
2302         }
2303       /* We're at the start of a comment.  */
2304       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2305                             &out_charpos, &out_bytepos, &dummy);
2306       from = out_charpos; from_byte = out_bytepos;
2307       if (!found)
2308         {
2309           immediate_quit = 0;
2310           SET_PT_BOTH (from, from_byte);
2311           return Qnil;
2312         }
2313       INC_BOTH (from, from_byte);
2314       UPDATE_SYNTAX_TABLE_FORWARD (from);
2315       /* We have skipped one comment.  */
2316       count1--;
2317     }
2318
2319   while (count1 < 0)
2320     {
2321       while (1)
2322         {
2323           int quoted, syntax;
2324
2325           if (from <= stop)
2326             {
2327               SET_PT_BOTH (BEGV, BEGV_BYTE);
2328               immediate_quit = 0;
2329               return Qnil;
2330             }
2331
2332           DEC_BOTH (from, from_byte);
2333           /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2334           quoted = char_quoted (from, from_byte);
2335           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2336           syntax = SYNTAX_WITH_FLAGS (c);
2337           code = SYNTAX (c);
2338           comstyle = 0;
2339           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2340           if (code == Sendcomment)
2341             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2342           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2343               && prev_char_comend_first (from, from_byte)
2344               && !char_quoted (from - 1, dec_bytepos (from_byte)))
2345             {
2346               int other_syntax;
2347               /* We must record the comment style encountered so that
2348                  later, we can match only the proper comment begin
2349                  sequence of the same style.  */
2350               DEC_BOTH (from, from_byte);
2351               code = Sendcomment;
2352               /* Calling char_quoted, above, set up global syntax position
2353                  at the new value of FROM.  */
2354               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2355               other_syntax = SYNTAX_WITH_FLAGS (c1);
2356               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2357               comnested
2358                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2359             }
2360
2361           if (code == Scomment_fence)
2362             {
2363               /* Skip until first preceding unquoted comment_fence.  */
2364               int fence_found = 0;
2365               EMACS_INT ini = from, ini_byte = from_byte;
2366
2367               while (1)
2368                 {
2369                   DEC_BOTH (from, from_byte);
2370                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2371                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2372                   if (SYNTAX (c) == Scomment_fence
2373                       && !char_quoted (from, from_byte))
2374                     {
2375                       fence_found = 1;
2376                       break;
2377                     }
2378                   else if (from == stop)
2379                     break;
2380                 }
2381               if (fence_found == 0)
2382                 {
2383                   from = ini;           /* Set point to ini + 1.  */
2384                   from_byte = ini_byte;
2385                   goto leave;
2386                 }
2387               else
2388                 /* We have skipped one comment.  */
2389                 break;
2390             }
2391           else if (code == Sendcomment)
2392             {
2393               found = back_comment (from, from_byte, stop, comnested, comstyle,
2394                                     &out_charpos, &out_bytepos);
2395               if (found == -1)
2396                 {
2397                   if (c == '\n')
2398                     /* This end-of-line is not an end-of-comment.
2399                        Treat it like a whitespace.
2400                        CC-mode (and maybe others) relies on this behavior.  */
2401                     ;
2402                   else
2403                     {
2404                       /* Failure: we should go back to the end of this
2405                          not-quite-endcomment.  */
2406                       if (SYNTAX (c) != code)
2407                         /* It was a two-char Sendcomment.  */
2408                         INC_BOTH (from, from_byte);
2409                       goto leave;
2410                     }
2411                 }
2412               else
2413                 {
2414                   /* We have skipped one comment.  */
2415                   from = out_charpos, from_byte = out_bytepos;
2416                   break;
2417                 }
2418             }
2419           else if (code != Swhitespace || quoted)
2420             {
2421             leave:
2422               immediate_quit = 0;
2423               INC_BOTH (from, from_byte);
2424               SET_PT_BOTH (from, from_byte);
2425               return Qnil;
2426             }
2427         }
2428
2429       count1++;
2430     }
2431
2432   SET_PT_BOTH (from, from_byte);
2433   immediate_quit = 0;
2434   return Qt;
2435 }
2436 \f
2437 /* Return syntax code of character C if C is an ASCII character
2438    or `multibyte_symbol_p' is zero.  Otherwise, return Ssymbol.  */
2439
2440 #define SYNTAX_WITH_MULTIBYTE_CHECK(c)          \
2441   ((ASCII_CHAR_P (c) || !multibyte_symbol_p)    \
2442    ? SYNTAX (c) : Ssymbol)
2443
2444 static Lisp_Object
2445 scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
2446 {
2447   Lisp_Object val;
2448   register EMACS_INT stop = count > 0 ? ZV : BEGV;
2449   register int c, c1;
2450   int stringterm;
2451   int quoted;
2452   int mathexit = 0;
2453   register enum syntaxcode code, temp_code;
2454   int min_depth = depth;    /* Err out if depth gets less than this.  */
2455   int comstyle = 0;         /* style of comment encountered */
2456   int comnested = 0;        /* whether the comment is nestable or not */
2457   EMACS_INT temp_pos;
2458   EMACS_INT last_good = from;
2459   int found;
2460   EMACS_INT from_byte;
2461   EMACS_INT out_bytepos, out_charpos;
2462   int temp, dummy;
2463   int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2464
2465   if (depth > 0) min_depth = 0;
2466
2467   if (from > ZV) from = ZV;
2468   if (from < BEGV) from = BEGV;
2469
2470   from_byte = CHAR_TO_BYTE (from);
2471
2472   immediate_quit = 1;
2473   QUIT;
2474
2475   SETUP_SYNTAX_TABLE (from, count);
2476   while (count > 0)
2477     {
2478       while (from < stop)
2479         {
2480           int comstart_first, prefix, syntax, other_syntax;
2481           UPDATE_SYNTAX_TABLE_FORWARD (from);
2482           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2483           syntax = SYNTAX_WITH_FLAGS (c);
2484           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2485           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2486           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2487           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2488           prefix = SYNTAX_FLAGS_PREFIX (syntax);
2489           if (depth == min_depth)
2490             last_good = from;
2491           INC_BOTH (from, from_byte);
2492           UPDATE_SYNTAX_TABLE_FORWARD (from);
2493           if (from < stop && comstart_first
2494               && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2495                   other_syntax = SYNTAX_WITH_FLAGS (c),
2496                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2497               && parse_sexp_ignore_comments)
2498             {
2499               /* we have encountered a comment start sequence and we
2500                  are ignoring all text inside comments.  We must record
2501                  the comment style this sequence begins so that later,
2502                  only a comment end of the same style actually ends
2503                  the comment section */
2504               code = Scomment;
2505               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2506               comnested
2507                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2508               INC_BOTH (from, from_byte);
2509               UPDATE_SYNTAX_TABLE_FORWARD (from);
2510             }
2511
2512           if (prefix)
2513             continue;
2514
2515           switch (SWITCH_ENUM_CAST (code))
2516             {
2517             case Sescape:
2518             case Scharquote:
2519               if (from == stop)
2520                 goto lose;
2521               INC_BOTH (from, from_byte);
2522               /* treat following character as a word constituent */
2523             case Sword:
2524             case Ssymbol:
2525               if (depth || !sexpflag) break;
2526               /* This word counts as a sexp; return at end of it.  */
2527               while (from < stop)
2528                 {
2529                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2530
2531                   /* Some compilers can't handle this inside the switch.  */
2532                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2533                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2534                   switch (temp)
2535                     {
2536                     case Scharquote:
2537                     case Sescape:
2538                       INC_BOTH (from, from_byte);
2539                       if (from == stop)
2540                         goto lose;
2541                       break;
2542                     case Sword:
2543                     case Ssymbol:
2544                     case Squote:
2545                       break;
2546                     default:
2547                       goto done;
2548                     }
2549                   INC_BOTH (from, from_byte);
2550                 }
2551               goto done;
2552
2553             case Scomment_fence:
2554               comstyle = ST_COMMENT_STYLE;
2555               /* FALLTHROUGH */
2556             case Scomment:
2557               if (!parse_sexp_ignore_comments) break;
2558               UPDATE_SYNTAX_TABLE_FORWARD (from);
2559               found = forw_comment (from, from_byte, stop,
2560                                     comnested, comstyle, 0,
2561                                     &out_charpos, &out_bytepos, &dummy);
2562               from = out_charpos, from_byte = out_bytepos;
2563               if (!found)
2564                 {
2565                   if (depth == 0)
2566                     goto done;
2567                   goto lose;
2568                 }
2569               INC_BOTH (from, from_byte);
2570               UPDATE_SYNTAX_TABLE_FORWARD (from);
2571               break;
2572
2573             case Smath:
2574               if (!sexpflag)
2575                 break;
2576               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2577                 {
2578                   INC_BOTH (from, from_byte);
2579                 }
2580               if (mathexit)
2581                 {
2582                   mathexit = 0;
2583                   goto close1;
2584                 }
2585               mathexit = 1;
2586
2587             case Sopen:
2588               if (!++depth) goto done;
2589               break;
2590
2591             case Sclose:
2592             close1:
2593               if (!--depth) goto done;
2594               if (depth < min_depth)
2595                 xsignal3 (Qscan_error,
2596                           build_string ("Containing expression ends prematurely"),
2597                           make_number (last_good), make_number (from));
2598               break;
2599
2600             case Sstring:
2601             case Sstring_fence:
2602               temp_pos = dec_bytepos (from_byte);
2603               stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2604               while (1)
2605                 {
2606                   if (from >= stop)
2607                     goto lose;
2608                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2609                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2610                   if (code == Sstring
2611                       ? (c == stringterm
2612                          && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2613                       : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2614                     break;
2615
2616                   /* Some compilers can't handle this inside the switch.  */
2617                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2618                   switch (temp)
2619                     {
2620                     case Scharquote:
2621                     case Sescape:
2622                       INC_BOTH (from, from_byte);
2623                     }
2624                   INC_BOTH (from, from_byte);
2625                 }
2626               INC_BOTH (from, from_byte);
2627               if (!depth && sexpflag) goto done;
2628               break;
2629             default:
2630               /* Ignore whitespace, punctuation, quote, endcomment.  */
2631               break;
2632             }
2633         }
2634
2635       /* Reached end of buffer.  Error if within object, return nil if between */
2636       if (depth)
2637         goto lose;
2638
2639       immediate_quit = 0;
2640       return Qnil;
2641
2642       /* End of object reached */
2643     done:
2644       count--;
2645     }
2646
2647   while (count < 0)
2648     {
2649       while (from > stop)
2650         {
2651           int syntax;
2652           DEC_BOTH (from, from_byte);
2653           UPDATE_SYNTAX_TABLE_BACKWARD (from);
2654           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2655           syntax= SYNTAX_WITH_FLAGS (c);
2656           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2657           if (depth == min_depth)
2658             last_good = from;
2659           comstyle = 0;
2660           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2661           if (code == Sendcomment)
2662             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2663           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2664               && prev_char_comend_first (from, from_byte)
2665               && parse_sexp_ignore_comments)
2666             {
2667               /* We must record the comment style encountered so that
2668                  later, we can match only the proper comment begin
2669                  sequence of the same style.  */
2670               int c2, other_syntax;
2671               DEC_BOTH (from, from_byte);
2672               UPDATE_SYNTAX_TABLE_BACKWARD (from);
2673               code = Sendcomment;
2674               c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2675               other_syntax = SYNTAX_WITH_FLAGS (c2);
2676               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2677               comnested
2678                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2679             }
2680
2681           /* Quoting turns anything except a comment-ender
2682              into a word character.  Note that this cannot be true
2683              if we decremented FROM in the if-statement above.  */
2684           if (code != Sendcomment && char_quoted (from, from_byte))
2685             {
2686               DEC_BOTH (from, from_byte);
2687               code = Sword;
2688             }
2689           else if (SYNTAX_FLAGS_PREFIX (syntax))
2690             continue;
2691
2692           switch (SWITCH_ENUM_CAST (code))
2693             {
2694             case Sword:
2695             case Ssymbol:
2696             case Sescape:
2697             case Scharquote:
2698               if (depth || !sexpflag) break;
2699               /* This word counts as a sexp; count object finished
2700                  after passing it.  */
2701               while (from > stop)
2702                 {
2703                   temp_pos = from_byte;
2704                   if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2705                     DEC_POS (temp_pos);
2706                   else
2707                     temp_pos--;
2708                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2709                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2710                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2711                   /* Don't allow comment-end to be quoted.  */
2712                   if (temp_code == Sendcomment)
2713                     goto done2;
2714                   quoted = char_quoted (from - 1, temp_pos);
2715                   if (quoted)
2716                     {
2717                       DEC_BOTH (from, from_byte);
2718                       temp_pos = dec_bytepos (temp_pos);
2719                       UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2720                     }
2721                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2722                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2723                   if (! (quoted || temp_code == Sword
2724                          || temp_code == Ssymbol
2725                          || temp_code == Squote))
2726                     goto done2;
2727                   DEC_BOTH (from, from_byte);
2728                 }
2729               goto done2;
2730
2731             case Smath:
2732               if (!sexpflag)
2733                 break;
2734               temp_pos = dec_bytepos (from_byte);
2735               UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2736               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2737                 DEC_BOTH (from, from_byte);
2738               if (mathexit)
2739                 {
2740                   mathexit = 0;
2741                   goto open2;
2742                 }
2743               mathexit = 1;
2744
2745             case Sclose:
2746               if (!++depth) goto done2;
2747               break;
2748
2749             case Sopen:
2750             open2:
2751               if (!--depth) goto done2;
2752               if (depth < min_depth)
2753                 xsignal3 (Qscan_error,
2754                           build_string ("Containing expression ends prematurely"),
2755                           make_number (last_good), make_number (from));
2756               break;
2757
2758             case Sendcomment:
2759               if (!parse_sexp_ignore_comments)
2760                 break;
2761               found = back_comment (from, from_byte, stop, comnested, comstyle,
2762                                     &out_charpos, &out_bytepos);
2763               /* FIXME:  if found == -1, then it really wasn't a comment-end.
2764                  For single-char Sendcomment, we can't do much about it apart
2765                  from skipping the char.
2766                  For 2-char endcomments, we could try again, taking both
2767                  chars as separate entities, but it's a lot of trouble
2768                  for very little gain, so we don't bother either.  -sm */
2769               if (found != -1)
2770                 from = out_charpos, from_byte = out_bytepos;
2771               break;
2772
2773             case Scomment_fence:
2774             case Sstring_fence:
2775               while (1)
2776                 {
2777                   if (from == stop)
2778                     goto lose;
2779                   DEC_BOTH (from, from_byte);
2780                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2781                   if (!char_quoted (from, from_byte)
2782                       && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2783                           SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2784                     break;
2785                 }
2786               if (code == Sstring_fence && !depth && sexpflag) goto done2;
2787               break;
2788
2789             case Sstring:
2790               stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2791               while (1)
2792                 {
2793                   if (from == stop)
2794                     goto lose;
2795                   DEC_BOTH (from, from_byte);
2796                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2797                   if (!char_quoted (from, from_byte)
2798                       && (stringterm
2799                           == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2800                       && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2801                     break;
2802                 }
2803               if (!depth && sexpflag) goto done2;
2804               break;
2805             default:
2806               /* Ignore whitespace, punctuation, quote, endcomment.  */
2807               break;
2808             }
2809         }
2810
2811       /* Reached start of buffer.  Error if within object, return nil if between */
2812       if (depth)
2813         goto lose;
2814
2815       immediate_quit = 0;
2816       return Qnil;
2817
2818     done2:
2819       count++;
2820     }
2821
2822
2823   immediate_quit = 0;
2824   XSETFASTINT (val, from);
2825   return val;
2826
2827  lose:
2828   xsignal3 (Qscan_error,
2829             build_string ("Unbalanced parentheses"),
2830             make_number (last_good), make_number (from));
2831 }
2832
2833 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2834        doc: /* Scan from character number FROM by COUNT lists.
2835 Returns the character number of the position thus found.
2836
2837 If DEPTH is nonzero, paren depth begins counting from that value,
2838 only places where the depth in parentheses becomes zero
2839 are candidates for stopping; COUNT such places are counted.
2840 Thus, a positive value for DEPTH means go out levels.
2841
2842 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2843
2844 If the beginning or end of (the accessible part of) the buffer is reached
2845 and the depth is wrong, an error is signaled.
2846 If the depth is right but the count is not used up, nil is returned.  */)
2847   (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2848 {
2849   CHECK_NUMBER (from);
2850   CHECK_NUMBER (count);
2851   CHECK_NUMBER (depth);
2852
2853   return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2854 }
2855
2856 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2857        doc: /* Scan from character number FROM by COUNT balanced expressions.
2858 If COUNT is negative, scan backwards.
2859 Returns the character number of the position thus found.
2860
2861 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2862
2863 If the beginning or end of (the accessible part of) the buffer is reached
2864 in the middle of a parenthetical grouping, an error is signaled.
2865 If the beginning or end is reached between groupings
2866 but before count is used up, nil is returned.  */)
2867   (Lisp_Object from, Lisp_Object count)
2868 {
2869   CHECK_NUMBER (from);
2870   CHECK_NUMBER (count);
2871
2872   return scan_lists (XINT (from), XINT (count), 0, 1);
2873 }
2874
2875 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2876        0, 0, 0,
2877        doc: /* Move point backward over any number of chars with prefix syntax.
2878 This includes chars with "quote" or "prefix" syntax (' or p).  */)
2879   (void)
2880 {
2881   EMACS_INT beg = BEGV;
2882   EMACS_INT opoint = PT;
2883   EMACS_INT opoint_byte = PT_BYTE;
2884   EMACS_INT pos = PT;
2885   EMACS_INT pos_byte = PT_BYTE;
2886   int c;
2887
2888   if (pos <= beg)
2889     {
2890       SET_PT_BOTH (opoint, opoint_byte);
2891
2892       return Qnil;
2893     }
2894
2895   SETUP_SYNTAX_TABLE (pos, -1);
2896
2897   DEC_BOTH (pos, pos_byte);
2898
2899   while (!char_quoted (pos, pos_byte)
2900          /* Previous statement updates syntax table.  */
2901          && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2902              || SYNTAX_PREFIX (c)))
2903     {
2904       opoint = pos;
2905       opoint_byte = pos_byte;
2906
2907       if (pos + 1 > beg)
2908         DEC_BOTH (pos, pos_byte);
2909     }
2910
2911   SET_PT_BOTH (opoint, opoint_byte);
2912
2913   return Qnil;
2914 }
2915 \f
2916 /* Parse forward from FROM / FROM_BYTE to END,
2917    assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2918    and return a description of the state of the parse at END.
2919    If STOPBEFORE is nonzero, stop at the start of an atom.
2920    If COMMENTSTOP is 1, stop at the start of a comment.
2921    If COMMENTSTOP is -1, stop at the start or end of a comment,
2922    after the beginning of a string, or after the end of a string.  */
2923
2924 static void
2925 scan_sexps_forward (struct lisp_parse_state *stateptr,
2926                     EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2927                     int targetdepth, int stopbefore,
2928                     Lisp_Object oldstate, int commentstop)
2929 {
2930   struct lisp_parse_state state;
2931
2932   register enum syntaxcode code;
2933   int c1;
2934   int comnested;
2935   struct level { int last, prev; };
2936   struct level levelstart[100];
2937   register struct level *curlevel = levelstart;
2938   struct level *endlevel = levelstart + 100;
2939   register int depth;   /* Paren depth of current scanning location.
2940                            level - levelstart equals this except
2941                            when the depth becomes negative.  */
2942   int mindepth;         /* Lowest DEPTH value seen.  */
2943   int start_quoted = 0;         /* Nonzero means starting after a char quote */
2944   Lisp_Object tem;
2945   EMACS_INT prev_from;          /* Keep one character before FROM.  */
2946   EMACS_INT prev_from_byte;
2947   int prev_from_syntax;
2948   int boundary_stop = commentstop == -1;
2949   int nofence;
2950   int found;
2951   EMACS_INT out_bytepos, out_charpos;
2952   int temp;
2953
2954   prev_from = from;
2955   prev_from_byte = from_byte;
2956   if (from != BEGV)
2957     DEC_BOTH (prev_from, prev_from_byte);
2958
2959   /* Use this macro instead of `from++'.  */
2960 #define INC_FROM                                \
2961 do { prev_from = from;                          \
2962      prev_from_byte = from_byte;                \
2963      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);   \
2964      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2965      INC_BOTH (from, from_byte);                \
2966      if (from < end)                            \
2967        UPDATE_SYNTAX_TABLE_FORWARD (from);      \
2968   } while (0)
2969
2970   immediate_quit = 1;
2971   QUIT;
2972
2973   if (NILP (oldstate))
2974     {
2975       depth = 0;
2976       state.instring = -1;
2977       state.incomment = 0;
2978       state.comstyle = 0;       /* comment style a by default.  */
2979       state.comstr_start = -1;  /* no comment/string seen.  */
2980     }
2981   else
2982     {
2983       tem = Fcar (oldstate);
2984       if (!NILP (tem))
2985         depth = XINT (tem);
2986       else
2987         depth = 0;
2988
2989       oldstate = Fcdr (oldstate);
2990       oldstate = Fcdr (oldstate);
2991       oldstate = Fcdr (oldstate);
2992       tem = Fcar (oldstate);
2993       /* Check whether we are inside string_fence-style string: */
2994       state.instring = (!NILP (tem)
2995                         ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2996                         : -1);
2997
2998       oldstate = Fcdr (oldstate);
2999       tem = Fcar (oldstate);
3000       state.incomment = (!NILP (tem)
3001                          ? (INTEGERP (tem) ? XINT (tem) : -1)
3002                          : 0);
3003
3004       oldstate = Fcdr (oldstate);
3005       tem = Fcar (oldstate);
3006       start_quoted = !NILP (tem);
3007
3008       /* if the eighth element of the list is nil, we are in comment
3009          style a.  If it is non-nil, we are in comment style b */
3010       oldstate = Fcdr (oldstate);
3011       oldstate = Fcdr (oldstate);
3012       tem = Fcar (oldstate);
3013       state.comstyle = (NILP (tem)
3014                         ? 0
3015                         : (EQ (tem, Qsyntax_table)
3016                            ? ST_COMMENT_STYLE
3017                            : INTEGERP (tem) ? XINT (tem) : 1));
3018
3019       oldstate = Fcdr (oldstate);
3020       tem = Fcar (oldstate);
3021       state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
3022       oldstate = Fcdr (oldstate);
3023       tem = Fcar (oldstate);
3024       while (!NILP (tem))               /* >= second enclosing sexps.  */
3025         {
3026           /* curlevel++->last ran into compiler bug on Apollo */
3027           curlevel->last = XINT (Fcar (tem));
3028           if (++curlevel == endlevel)
3029             curlevel--; /* error ("Nesting too deep for parser"); */
3030           curlevel->prev = -1;
3031           curlevel->last = -1;
3032           tem = Fcdr (tem);
3033         }
3034     }
3035   state.quoted = 0;
3036   mindepth = depth;
3037
3038   curlevel->prev = -1;
3039   curlevel->last = -1;
3040
3041   SETUP_SYNTAX_TABLE (prev_from, 1);
3042   temp = FETCH_CHAR (prev_from_byte);
3043   prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3044   UPDATE_SYNTAX_TABLE_FORWARD (from);
3045
3046   /* Enter the loop at a place appropriate for initial state.  */
3047
3048   if (state.incomment)
3049     goto startincomment;
3050   if (state.instring >= 0)
3051     {
3052       nofence = state.instring != ST_STRING_STYLE;
3053       if (start_quoted)
3054         goto startquotedinstring;
3055       goto startinstring;
3056     }
3057   else if (start_quoted)
3058     goto startquoted;
3059
3060   while (from < end)
3061     {
3062       int syntax;
3063       INC_FROM;
3064       code = prev_from_syntax & 0xff;
3065
3066       if (from < end
3067           && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3068           && (c1 = FETCH_CHAR (from_byte),
3069               syntax = SYNTAX_WITH_FLAGS (c1),
3070               SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3071         /* Duplicate code to avoid a complex if-expression
3072            which causes trouble for the SGI compiler.  */
3073         {
3074           /* Record the comment style we have entered so that only
3075              the comment-end sequence of the same style actually
3076              terminates the comment section.  */
3077           state.comstyle
3078             = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3079           comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3080           comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
3081           state.incomment = comnested ? 1 : -1;
3082           state.comstr_start = prev_from;
3083           INC_FROM;
3084           code = Scomment;
3085         }
3086       else if (code == Scomment_fence)
3087         {
3088           /* Record the comment style we have entered so that only
3089              the comment-end sequence of the same style actually
3090              terminates the comment section.  */
3091           state.comstyle = ST_COMMENT_STYLE;
3092           state.incomment = -1;
3093           state.comstr_start = prev_from;
3094           code = Scomment;
3095         }
3096       else if (code == Scomment)
3097         {
3098           state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3099           state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3100                              1 : -1);
3101           state.comstr_start = prev_from;
3102         }
3103
3104       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3105         continue;
3106       switch (SWITCH_ENUM_CAST (code))
3107         {
3108         case Sescape:
3109         case Scharquote:
3110           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3111           curlevel->last = prev_from;
3112         startquoted:
3113           if (from == end) goto endquoted;
3114           INC_FROM;
3115           goto symstarted;
3116           /* treat following character as a word constituent */
3117         case Sword:
3118         case Ssymbol:
3119           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3120           curlevel->last = prev_from;
3121         symstarted:
3122           while (from < end)
3123             {
3124               /* Some compilers can't handle this inside the switch.  */
3125               temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3126               temp = SYNTAX (temp);
3127               switch (temp)
3128                 {
3129                 case Scharquote:
3130                 case Sescape:
3131                   INC_FROM;
3132                   if (from == end) goto endquoted;
3133                   break;
3134                 case Sword:
3135                 case Ssymbol:
3136                 case Squote:
3137                   break;
3138                 default:
3139                   goto symdone;
3140                 }
3141               INC_FROM;
3142             }
3143         symdone:
3144           curlevel->prev = curlevel->last;
3145           break;
3146
3147         case Scomment_fence: /* Can't happen because it's handled above.  */
3148         case Scomment:
3149           if (commentstop || boundary_stop) goto done;
3150         startincomment:
3151           /* The (from == BEGV) test was to enter the loop in the middle so
3152              that we find a 2-char comment ender even if we start in the
3153              middle of it.  We don't want to do that if we're just at the
3154              beginning of the comment (think of (*) ... (*)).  */
3155           found = forw_comment (from, from_byte, end,
3156                                 state.incomment, state.comstyle,
3157                                 (from == BEGV || from < state.comstr_start + 3)
3158                                 ? 0 : prev_from_syntax,
3159                                 &out_charpos, &out_bytepos, &state.incomment);
3160           from = out_charpos; from_byte = out_bytepos;
3161           /* Beware!  prev_from and friends are invalid now.
3162              Luckily, the `done' doesn't use them and the INC_FROM
3163              sets them to a sane value without looking at them. */
3164           if (!found) goto done;
3165           INC_FROM;
3166           state.incomment = 0;
3167           state.comstyle = 0;   /* reset the comment style */
3168           if (boundary_stop) goto done;
3169           break;
3170
3171         case Sopen:
3172           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3173           depth++;
3174           /* curlevel++->last ran into compiler bug on Apollo */
3175           curlevel->last = prev_from;
3176           if (++curlevel == endlevel)
3177             curlevel--; /* error ("Nesting too deep for parser"); */
3178           curlevel->prev = -1;
3179           curlevel->last = -1;
3180           if (targetdepth == depth) goto done;
3181           break;
3182
3183         case Sclose:
3184           depth--;
3185           if (depth < mindepth)
3186             mindepth = depth;
3187           if (curlevel != levelstart)
3188             curlevel--;
3189           curlevel->prev = curlevel->last;
3190           if (targetdepth == depth) goto done;
3191           break;
3192
3193         case Sstring:
3194         case Sstring_fence:
3195           state.comstr_start = from - 1;
3196           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3197           curlevel->last = prev_from;
3198           state.instring = (code == Sstring
3199                             ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3200                             : ST_STRING_STYLE);
3201           if (boundary_stop) goto done;
3202         startinstring:
3203           {
3204             nofence = state.instring != ST_STRING_STYLE;
3205
3206             while (1)
3207               {
3208                 int c;
3209
3210                 if (from >= end) goto done;
3211                 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3212                 /* Some compilers can't handle this inside the switch.  */
3213                 temp = SYNTAX (c);
3214
3215                 /* Check TEMP here so that if the char has
3216                    a syntax-table property which says it is NOT
3217                    a string character, it does not end the string.  */
3218                 if (nofence && c == state.instring && temp == Sstring)
3219                   break;
3220
3221                 switch (temp)
3222                   {
3223                   case Sstring_fence:
3224                     if (!nofence) goto string_end;
3225                     break;
3226                   case Scharquote:
3227                   case Sescape:
3228                     INC_FROM;
3229                   startquotedinstring:
3230                     if (from >= end) goto endquoted;
3231                   }
3232                 INC_FROM;
3233               }
3234           }
3235         string_end:
3236           state.instring = -1;
3237           curlevel->prev = curlevel->last;
3238           INC_FROM;
3239           if (boundary_stop) goto done;
3240           break;
3241
3242         case Smath:
3243           /* FIXME: We should do something with it.  */
3244           break;
3245         default:
3246           /* Ignore whitespace, punctuation, quote, endcomment.  */
3247           break;
3248         }
3249     }
3250   goto done;
3251
3252  stop:   /* Here if stopping before start of sexp. */
3253   from = prev_from;    /* We have just fetched the char that starts it; */
3254   goto done; /* but return the position before it. */
3255
3256  endquoted:
3257   state.quoted = 1;
3258  done:
3259   state.depth = depth;
3260   state.mindepth = mindepth;
3261   state.thislevelstart = curlevel->prev;
3262   state.prevlevelstart
3263     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3264   state.location = from;
3265   state.levelstarts = Qnil;
3266   while (curlevel > levelstart)
3267     state.levelstarts = Fcons (make_number ((--curlevel)->last),
3268                                state.levelstarts);
3269   immediate_quit = 0;
3270
3271   *stateptr = state;
3272 }
3273
3274 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3275        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3276 Parsing stops at TO or when certain criteria are met;
3277  point is set to where parsing stops.
3278 If fifth arg OLDSTATE is omitted or nil,
3279  parsing assumes that FROM is the beginning of a function.
3280 Value is a list of elements describing final state of parsing:
3281  0. depth in parens.
3282  1. character address of start of innermost containing list; nil if none.
3283  2. character address of start of last complete sexp terminated.
3284  3. non-nil if inside a string.
3285     (it is the character that will terminate the string,
3286      or t if the string should be terminated by a generic string delimiter.)
3287  4. nil if outside a comment, t if inside a non-nestable comment,
3288     else an integer (the current comment nesting).
3289  5. t if following a quote character.
3290  6. the minimum paren-depth encountered during this scan.
3291  7. style of comment, if any.
3292  8. character address of start of comment or string; nil if not in one.
3293  9. Intermediate data for continuation of parsing (subject to change).
3294 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3295 in parentheses becomes equal to TARGETDEPTH.
3296 Fourth arg STOPBEFORE non-nil means stop when come to
3297  any character that starts a sexp.
3298 Fifth arg OLDSTATE is a list like what this function returns.
3299  It is used to initialize the state of the parse.  Elements number 1, 2, 6
3300  and 8 are ignored.
3301 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3302  If it is symbol `syntax-table', stop after the start of a comment or a
3303  string, or after end of a comment or a string.  */)
3304   (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3305 {
3306   struct lisp_parse_state state;
3307   int target;
3308
3309   if (!NILP (targetdepth))
3310     {
3311       CHECK_NUMBER (targetdepth);
3312       target = XINT (targetdepth);
3313     }
3314   else
3315     target = -100000;           /* We won't reach this depth */
3316
3317   validate_region (&from, &to);
3318   scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3319                       XINT (to),
3320                       target, !NILP (stopbefore), oldstate,
3321                       (NILP (commentstop)
3322                        ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3323
3324   SET_PT (state.location);
3325
3326   return Fcons (make_number (state.depth),
3327            Fcons (state.prevlevelstart < 0
3328                   ? Qnil : make_number (state.prevlevelstart),
3329              Fcons (state.thislevelstart < 0
3330                     ? Qnil : make_number (state.thislevelstart),
3331                Fcons (state.instring >= 0
3332                       ? (state.instring == ST_STRING_STYLE
3333                          ? Qt : make_number (state.instring)) : Qnil,
3334                  Fcons (state.incomment < 0 ? Qt :
3335                         (state.incomment == 0 ? Qnil :
3336                          make_number (state.incomment)),
3337                    Fcons (state.quoted ? Qt : Qnil,
3338                      Fcons (make_number (state.mindepth),
3339                        Fcons ((state.comstyle
3340                                ? (state.comstyle == ST_COMMENT_STYLE
3341                                   ? Qsyntax_table
3342                                   : make_number (state.comstyle))
3343                                : Qnil),
3344                               Fcons (((state.incomment
3345                                        || (state.instring >= 0))
3346                                       ? make_number (state.comstr_start)
3347                                       : Qnil),
3348                                      Fcons (state.levelstarts, Qnil))))))))));
3349 }
3350 \f
3351 void
3352 init_syntax_once (void)
3353 {
3354   register int i, c;
3355   Lisp_Object temp;
3356
3357   /* This has to be done here, before we call Fmake_char_table.  */
3358   Qsyntax_table = intern_c_string ("syntax-table");
3359   staticpro (&Qsyntax_table);
3360
3361   /* Intern_C_String this now in case it isn't already done.
3362      Setting this variable twice is harmless.
3363      But don't staticpro it here--that is done in alloc.c.  */
3364   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3365
3366   /* Create objects which can be shared among syntax tables.  */
3367   Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3368   for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3369     XVECTOR (Vsyntax_code_object)->contents[i]
3370       = Fcons (make_number (i), Qnil);
3371
3372   /* Now we are ready to set up this property, so we can
3373      create syntax tables.  */
3374   Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3375
3376   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3377
3378   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3379
3380   /* Control characters should not be whitespace.  */
3381   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3382   for (i = 0; i <= ' ' - 1; i++)
3383     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3384   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3385
3386   /* Except that a few really are whitespace.  */
3387   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3388   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3389   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3390   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3391   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3392   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3393
3394   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3395   for (i = 'a'; i <= 'z'; i++)
3396     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3397   for (i = 'A'; i <= 'Z'; i++)
3398     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3399   for (i = '0'; i <= '9'; i++)
3400     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3401
3402   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3403   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3404
3405   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3406                         Fcons (make_number (Sopen), make_number (')')));
3407   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3408                         Fcons (make_number (Sclose), make_number ('(')));
3409   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3410                         Fcons (make_number (Sopen), make_number (']')));
3411   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3412                         Fcons (make_number (Sclose), make_number ('[')));
3413   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3414                         Fcons (make_number (Sopen), make_number ('}')));
3415   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3416                         Fcons (make_number (Sclose), make_number ('{')));
3417   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3418                         Fcons (make_number ((int) Sstring), Qnil));
3419   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3420                         Fcons (make_number ((int) Sescape), Qnil));
3421
3422   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3423   for (i = 0; i < 10; i++)
3424     {
3425       c = "_-+*/&|<>="[i];
3426       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3427     }
3428
3429   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3430   for (i = 0; i < 12; i++)
3431     {
3432       c = ".,;:?!#@~^'`"[i];
3433       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3434     }
3435
3436   /* All multibyte characters have syntax `word' by default.  */
3437   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3438   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3439 }
3440
3441 void
3442 syms_of_syntax (void)
3443 {
3444   Qsyntax_table_p = intern_c_string ("syntax-table-p");
3445   staticpro (&Qsyntax_table_p);
3446
3447   staticpro (&Vsyntax_code_object);
3448
3449   staticpro (&gl_state.object);
3450   staticpro (&gl_state.global_code);
3451   staticpro (&gl_state.current_syntax_table);
3452   staticpro (&gl_state.old_prop);
3453
3454   /* Defined in regex.c */
3455   staticpro (&re_match_object);
3456
3457   Qscan_error = intern_c_string ("scan-error");
3458   staticpro (&Qscan_error);
3459   Fput (Qscan_error, Qerror_conditions,
3460         pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
3461   Fput (Qscan_error, Qerror_message,
3462         make_pure_c_string ("Scan error"));
3463
3464   DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3465                doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3466
3467   DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3468                doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3469 Otherwise, that text property is simply ignored.
3470 See the info node `(elisp)Syntax Properties' for a description of the
3471 `syntax-table' property.  */);
3472
3473   words_include_escapes = 0;
3474   DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3475                doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3476
3477   DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3478                doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3479   multibyte_syntax_as_symbol = 0;
3480
3481   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3482                open_paren_in_column_0_is_defun_start,
3483                doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3484   open_paren_in_column_0_is_defun_start = 1;
3485
3486
3487   DEFVAR_LISP ("find-word-boundary-function-table",
3488                Vfind_word_boundary_function_table,
3489                doc: /*
3490 Char table of functions to search for the word boundary.
3491 Each function is called with two arguments; POS and LIMIT.
3492 POS and LIMIT are character positions in the current buffer.
3493
3494 If POS is less than LIMIT, POS is at the first character of a word,
3495 and the return value of a function is a position after the last
3496 character of that word.
3497
3498 If POS is not less than LIMIT, POS is at the last character of a word,
3499 and the return value of a function is a position at the first
3500 character of that word.
3501
3502 In both cases, LIMIT bounds the search. */);
3503   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3504
3505   defsubr (&Ssyntax_table_p);
3506   defsubr (&Ssyntax_table);
3507   defsubr (&Sstandard_syntax_table);
3508   defsubr (&Scopy_syntax_table);
3509   defsubr (&Sset_syntax_table);
3510   defsubr (&Schar_syntax);
3511   defsubr (&Smatching_paren);
3512   defsubr (&Sstring_to_syntax);
3513   defsubr (&Smodify_syntax_entry);
3514   defsubr (&Sinternal_describe_syntax_value);
3515
3516   defsubr (&Sforward_word);
3517
3518   defsubr (&Sskip_chars_forward);
3519   defsubr (&Sskip_chars_backward);
3520   defsubr (&Sskip_syntax_forward);
3521   defsubr (&Sskip_syntax_backward);
3522
3523   defsubr (&Sforward_comment);
3524   defsubr (&Sscan_lists);
3525   defsubr (&Sscan_sexps);
3526   defsubr (&Sbackward_prefix_chars);
3527   defsubr (&Sparse_partial_sexp);
3528 }