src/syntax.c

   1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
   2    Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2011
   3                  Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20
  21 #include <config.h>
  22 #include <ctype.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "commands.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "keymap.h"
  29 #include "regex.h"
  30
  31 /* Make syntax table lookup grant data in gl_state.  */
  32 #define SYNTAX_ENTRY_VIA_PROPERTY
  33
  34 #include "syntax.h"
  35 #include "intervals.h"
  36 #include "category.h"
  37
  38 /* Then there are seven single-bit flags that have the following meanings:
  39   1. This character is the first of a two-character comment-start sequence.
  40   2. This character is the second of a two-character comment-start sequence.
  41   3. This character is the first of a two-character comment-end sequence.
  42   4. This character is the second of a two-character comment-end sequence.
  43   5. This character is a prefix, for backward-prefix-chars.
  44   6. The char is part of a delimiter for comments of style "b".
  45   7. This character is part of a nestable comment sequence.
  46   8. The char is part of a delimiter for comments of style "c".
  47   Note that any two-character sequence whose first character has flag 1
  48   and whose second character has flag 2 will be interpreted as a comment start.
  49
  50   bit 6 and 8 are used to discriminate between different comment styles.
  51   Languages such as C++ allow two orthogonal syntax start/end pairs
  52   and bit 6 is used to determine whether a comment-end or Scommentend
  53   ends style a or b.  Comment markers can start style a, b, c, or bc.
  54   Style a is always the default.
  55   For 2-char comment markers, the style b flag is only looked up on the second
  56   char of the comment marker and on the first char of the comment ender.
  57   For style c (like to for the nested flag), the flag can be placed on any
  58   one of the chars.
  59   */
  60
  61 /* These macros extract specific flags from an integer
  62    that holds the syntax code and the flags.  */
  63
  64 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
  65
  66 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
  67
  68 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
  69
  70 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
  71
  72 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
  73
  74 #define SYNTAX_FLAGS_COMMENT_STYLEB(flags) (((flags) >> 21) & 1)
  75 #define SYNTAX_FLAGS_COMMENT_STYLEC(flags) (((flags) >> 22) & 2)
  76 /* FLAGS should be the flags of the main char of the comment marker, e.g.
  77    the second for comstart and the first for comend.  */
  78 #define SYNTAX_FLAGS_COMMENT_STYLE(flags, other_flags) \
  79   (SYNTAX_FLAGS_COMMENT_STYLEB (flags) \
  80    | SYNTAX_FLAGS_COMMENT_STYLEC (flags) \
  81    | SYNTAX_FLAGS_COMMENT_STYLEC (other_flags))
  82
  83 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
  84
  85 /* These macros extract a particular flag for a given character.  */
  86
  87 #define SYNTAX_COMEND_FIRST(c) \
  88   (SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c)))
  89 #define SYNTAX_PREFIX(c) (SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c)))
  90
  91 /* We use these constants in place for comment-style and
  92    string-ender-char to distinguish  comments/strings started by
  93    comment_fence and string_fence codes.  */
  94
  95 #define ST_COMMENT_STYLE (256 + 1)
  96 #define ST_STRING_STYLE (256 + 2)
  97
  98 static Lisp_Object Qsyntax_table_p;
  99 static Lisp_Object Qsyntax_table, Qscan_error;
 100
 101 #ifndef __GNUC__
 102 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
 103    if not compiled with GCC.  No need to mark it, since it is used
 104    only very temporarily.  */
 105 Lisp_Object syntax_temp;
 106 #endif
 107
 108 /* This is the internal form of the parse state used in parse-partial-sexp.  */
 109
 110 struct lisp_parse_state
 111   {
 112     int depth;     /* Depth at end of parsing.  */
 113     int instring;  /* -1 if not within string, else desired terminator.  */
 114     int incomment; /* -1 if in unnestable comment else comment nesting */
 115     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
 116     int quoted;    /* Nonzero if just after an escape char at end of parsing */
 117     int mindepth;  /* Minimum depth seen while scanning.  */
 118     /* Char number of most recent start-of-expression at current level */
 119     EMACS_INT thislevelstart;
 120     /* Char number of start of containing expression */
 121     EMACS_INT prevlevelstart;
 122     EMACS_INT location;      /* Char number at which parsing stopped.  */
 123     EMACS_INT comstr_start;  /* Position of last comment/string starter.  */
 124     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
 125                                 of levels (starting from outermost).  */
 126   };
 127 \f
 128 /* These variables are a cache for finding the start of a defun.
 129    find_start_pos is the place for which the defun start was found.
 130    find_start_value is the defun start position found for it.
 131    find_start_value_byte is the corresponding byte position.
 132    find_start_buffer is the buffer it was found in.
 133    find_start_begv is the BEGV value when it was found.
 134    find_start_modiff is the value of MODIFF when it was found.  */
 135
 136 static EMACS_INT find_start_pos;
 137 static EMACS_INT find_start_value;
 138 static EMACS_INT find_start_value_byte;
 139 static struct buffer *find_start_buffer;
 140 static EMACS_INT find_start_begv;
 141 static int find_start_modiff;
 142
 143
 144 INFUN (Fsyntax_table_p, 1);
 145 static Lisp_Object skip_chars (int, Lisp_Object, Lisp_Object, int);
 146 static Lisp_Object skip_syntaxes (int, Lisp_Object, Lisp_Object);
 147 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, int);
 148 static void scan_sexps_forward (struct lisp_parse_state *,
 149                                 EMACS_INT, EMACS_INT, EMACS_INT, int,
 150                                 int, Lisp_Object, int);
 151 static int in_classes (int, Lisp_Object);
 152 \f
 153 /* Whether the syntax of the character C has the prefix flag set.  */
 154 int syntax_prefix_flag_p (int c)
 155 {
 156   return SYNTAX_PREFIX (c);
 157 }
 158
 159 struct gl_state_s gl_state;             /* Global state of syntax parser.  */
 160
 161 #define INTERVALS_AT_ONCE 10            /* 1 + max-number of intervals
 162                                            to scan to property-change.  */
 163
 164 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
 165    sign of COUNT give the relative position of CHARPOS wrt the previously
 166    valid interval.  If INIT, only [be]_property fields of gl_state are
 167    valid at start, the rest is filled basing on OBJECT.
 168
 169    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
 170    direction than the intervals - or in an interval.  We update the
 171    current syntax-table basing on the property of this interval, and
 172    update the interval to start further than CHARPOS - or be
 173    NULL_INTERVAL.  We also update lim_property to be the next value of
 174    charpos to call this subroutine again - or be before/after the
 175    start/end of OBJECT.  */
 176
 177 void
 178 update_syntax_table (EMACS_INT charpos, int count, int init,
 179                      Lisp_Object object)
 180 {
 181   Lisp_Object tmp_table;
 182   unsigned cnt = 0;
 183   int invalidate = 1;
 184   INTERVAL i;
 185
 186   if (init)
 187     {
 188       gl_state.old_prop = Qnil;
 189       gl_state.start = gl_state.b_property;
 190       gl_state.stop = gl_state.e_property;
 191       i = interval_of (charpos, object);
 192       gl_state.backward_i = gl_state.forward_i = i;
 193       invalidate = 0;
 194       if (NULL_INTERVAL_P (i))
 195         return;
 196       /* interval_of updates only ->position of the return value, so
 197          update the parents manually to speed up update_interval.  */
 198       while (!NULL_PARENT (i))
 199         {
 200           if (AM_RIGHT_CHILD (i))
 201             INTERVAL_PARENT (i)->position = i->position
 202               - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
 203               - TOTAL_LENGTH (INTERVAL_PARENT (i))
 204               + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
 205           else
 206             INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
 207               + TOTAL_LENGTH (i);
 208           i = INTERVAL_PARENT (i);
 209         }
 210       i = gl_state.forward_i;
 211       gl_state.b_property = i->position - gl_state.offset;
 212       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 213       goto update;
 214     }
 215   i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
 216
 217   /* We are guaranteed to be called with CHARPOS either in i,
 218      or further off.  */
 219   if (NULL_INTERVAL_P (i))
 220     error ("Error in syntax_table logic for to-the-end intervals");
 221   else if (charpos < i->position)               /* Move left.  */
 222     {
 223       if (count > 0)
 224         error ("Error in syntax_table logic for intervals <-");
 225       /* Update the interval.  */
 226       i = update_interval (i, charpos);
 227       if (INTERVAL_LAST_POS (i) != gl_state.b_property)
 228         {
 229           invalidate = 0;
 230           gl_state.forward_i = i;
 231           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 232         }
 233     }
 234   else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
 235     {
 236       if (count < 0)
 237         error ("Error in syntax_table logic for intervals ->");
 238       /* Update the interval.  */
 239       i = update_interval (i, charpos);
 240       if (i->position != gl_state.e_property)
 241         {
 242           invalidate = 0;
 243           gl_state.backward_i = i;
 244           gl_state.b_property = i->position - gl_state.offset;
 245         }
 246     }
 247
 248   update:
 249   tmp_table = textget (i->plist, Qsyntax_table);
 250
 251   if (invalidate)
 252     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
 253
 254   if (invalidate)               /* Did not get to adjacent interval.  */
 255     {                           /* with the same table => */
 256                                 /* invalidate the old range.  */
 257       if (count > 0)
 258         {
 259           gl_state.backward_i = i;
 260           gl_state.b_property = i->position - gl_state.offset;
 261         }
 262       else
 263         {
 264           gl_state.forward_i = i;
 265           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 266         }
 267     }
 268
 269   if (!EQ (tmp_table, gl_state.old_prop))
 270     {
 271       gl_state.current_syntax_table = tmp_table;
 272       gl_state.old_prop = tmp_table;
 273       if (EQ (Fsyntax_table_p (tmp_table), Qt))
 274         {
 275           gl_state.use_global = 0;
 276         }
 277       else if (CONSP (tmp_table))
 278         {
 279           gl_state.use_global = 1;
 280           gl_state.global_code = tmp_table;
 281         }
 282       else
 283         {
 284           gl_state.use_global = 0;
 285           gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
 286         }
 287     }
 288
 289   while (!NULL_INTERVAL_P (i))
 290     {
 291       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
 292         {
 293           if (count > 0)
 294             {
 295               gl_state.e_property = i->position - gl_state.offset;
 296               gl_state.forward_i = i;
 297             }
 298           else
 299             {
 300               gl_state.b_property
 301                 = i->position + LENGTH (i) - gl_state.offset;
 302               gl_state.backward_i = i;
 303             }
 304           return;
 305         }
 306       else if (cnt == INTERVALS_AT_ONCE)
 307         {
 308           if (count > 0)
 309             {
 310               gl_state.e_property
 311                 = i->position + LENGTH (i) - gl_state.offset
 312                 /* e_property at EOB is not set to ZV but to ZV+1, so that
 313                    we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
 314                    having to check eob between the two.  */
 315                 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
 316               gl_state.forward_i = i;
 317             }
 318           else
 319             {
 320               gl_state.b_property = i->position - gl_state.offset;
 321               gl_state.backward_i = i;
 322             }
 323           return;
 324         }
 325       cnt++;
 326       i = count > 0 ? next_interval (i) : previous_interval (i);
 327     }
 328   eassert (NULL_INTERVAL_P (i)); /* This property goes to the end.  */
 329   if (count > 0)
 330     gl_state.e_property = gl_state.stop;
 331   else
 332     gl_state.b_property = gl_state.start;
 333 }
 334 \f
 335 /* Returns TRUE if char at CHARPOS is quoted.
 336    Global syntax-table data should be set up already to be good at CHARPOS
 337    or after.  On return global syntax data is good for lookup at CHARPOS. */
 338
 339 static int
 340 char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
 341 {
 342   register enum syntaxcode code;
 343   register EMACS_INT beg = BEGV;
 344   register int quoted = 0;
 345   EMACS_INT orig = charpos;
 346
 347   while (charpos > beg)
 348     {
 349       int c;
 350       DEC_BOTH (charpos, bytepos);
 351
 352       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
 353       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
 354       code = SYNTAX (c);
 355       if (! (code == Scharquote || code == Sescape))
 356         break;
 357
 358       quoted = !quoted;
 359     }
 360
 361   UPDATE_SYNTAX_TABLE (orig);
 362   return quoted;
 363 }
 364
 365 /* Return the bytepos one character before BYTEPOS.
 366    We assume that BYTEPOS is not at the start of the buffer.  */
 367
 368 static INLINE EMACS_INT
 369 dec_bytepos (EMACS_INT bytepos)
 370 {
 371   if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
 372     return bytepos - 1;
 373
 374   DEC_POS (bytepos);
 375   return bytepos;
 376 }
 377 \f
 378 /* Return a defun-start position before POS and not too far before.
 379    It should be the last one before POS, or nearly the last.
 380
 381    When open_paren_in_column_0_is_defun_start is nonzero,
 382    only the beginning of the buffer is treated as a defun-start.
 383
 384    We record the information about where the scan started
 385    and what its result was, so that another call in the same area
 386    can return the same value very quickly.
 387
 388    There is no promise at which position the global syntax data is
 389    valid on return from the subroutine, so the caller should explicitly
 390    update the global data.  */
 391
 392 static EMACS_INT
 393 find_defun_start (EMACS_INT pos, EMACS_INT pos_byte)
 394 {
 395   EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
 396
 397   if (!open_paren_in_column_0_is_defun_start)
 398     {
 399       find_start_value_byte = BEGV_BYTE;
 400       return BEGV;
 401     }
 402
 403   /* Use previous finding, if it's valid and applies to this inquiry.  */
 404   if (current_buffer == find_start_buffer
 405       /* Reuse the defun-start even if POS is a little farther on.
 406          POS might be in the next defun, but that's ok.
 407          Our value may not be the best possible, but will still be usable.  */
 408       && pos <= find_start_pos + 1000
 409       && pos >= find_start_value
 410       && BEGV == find_start_begv
 411       && MODIFF == find_start_modiff)
 412     return find_start_value;
 413
 414   /* Back up to start of line.  */
 415   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
 416
 417   /* We optimize syntax-table lookup for rare updates.  Thus we accept
 418      only those `^\s(' which are good in global _and_ text-property
 419      syntax-tables.  */
 420   SETUP_BUFFER_SYNTAX_TABLE ();
 421   while (PT > BEGV)
 422     {
 423       int c;
 424
 425       /* Open-paren at start of line means we may have found our
 426          defun-start.  */
 427       c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 428       if (SYNTAX (c) == Sopen)
 429         {
 430           SETUP_SYNTAX_TABLE (PT + 1, -1);      /* Try again... */
 431           c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 432           if (SYNTAX (c) == Sopen)
 433             break;
 434           /* Now fallback to the default value.  */
 435           SETUP_BUFFER_SYNTAX_TABLE ();
 436         }
 437       /* Move to beg of previous line.  */
 438       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
 439     }
 440
 441   /* Record what we found, for the next try.  */
 442   find_start_value = PT;
 443   find_start_value_byte = PT_BYTE;
 444   find_start_buffer = current_buffer;
 445   find_start_modiff = MODIFF;
 446   find_start_begv = BEGV;
 447   find_start_pos = pos;
 448
 449   TEMP_SET_PT_BOTH (opoint, opoint_byte);
 450
 451   return find_start_value;
 452 }
 453 \f
 454 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
 455
 456 static int
 457 prev_char_comend_first (EMACS_INT pos, EMACS_INT pos_byte)
 458 {
 459   int c, val;
 460
 461   DEC_BOTH (pos, pos_byte);
 462   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 463   c = FETCH_CHAR (pos_byte);
 464   val = SYNTAX_COMEND_FIRST (c);
 465   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 466   return val;
 467 }
 468
 469 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE.  */
 470
 471 /* static int
 472  * prev_char_comstart_first (pos, pos_byte)
 473  *      int pos, pos_byte;
 474  * {
 475  *   int c, val;
 476  *
 477  *   DEC_BOTH (pos, pos_byte);
 478  *   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 479  *   c = FETCH_CHAR (pos_byte);
 480  *   val = SYNTAX_COMSTART_FIRST (c);
 481  *   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 482  *   return val;
 483  * } */
 484
 485 /* Checks whether charpos FROM is at the end of a comment.
 486    FROM_BYTE is the bytepos corresponding to FROM.
 487    Do not move back before STOP.
 488
 489    Return a positive value if we find a comment ending at FROM/FROM_BYTE;
 490    return -1 otherwise.
 491
 492    If successful, store the charpos of the comment's beginning
 493    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
 494
 495    Global syntax data remains valid for backward search starting at
 496    the returned value (or at FROM, if the search was not successful).  */
 497
 498 static int
 499 back_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop, int comnested, int comstyle, EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr)
 500 {
 501   /* Look back, counting the parity of string-quotes,
 502      and recording the comment-starters seen.
 503      When we reach a safe place, assume that's not in a string;
 504      then step the main scan to the earliest comment-starter seen
 505      an even number of string quotes away from the safe place.
 506
 507      OFROM[I] is position of the earliest comment-starter seen
 508      which is I+2X quotes from the comment-end.
 509      PARITY is current parity of quotes from the comment end.  */
 510   int string_style = -1;        /* Presumed outside of any string. */
 511   int string_lossage = 0;
 512   /* Not a real lossage: indicates that we have passed a matching comment
 513      starter plus a non-matching comment-ender, meaning that any matching
 514      comment-starter we might see later could be a false positive (hidden
 515      inside another comment).
 516      Test case:  { a (* b } c (* d *) */
 517   int comment_lossage = 0;
 518   EMACS_INT comment_end = from;
 519   EMACS_INT comment_end_byte = from_byte;
 520   EMACS_INT comstart_pos = 0;
 521   EMACS_INT comstart_byte IF_LINT (= 0);
 522   /* Place where the containing defun starts,
 523      or 0 if we didn't come across it yet.  */
 524   EMACS_INT defun_start = 0;
 525   EMACS_INT defun_start_byte = 0;
 526   register enum syntaxcode code;
 527   int nesting = 1;              /* current comment nesting */
 528   int c;
 529   int syntax = 0;
 530
 531   /* FIXME: A }} comment-ender style leads to incorrect behavior
 532      in the case of {{ c }}} because we ignore the last two chars which are
 533      assumed to be comment-enders although they aren't.  */
 534
 535   /* At beginning of range to scan, we're outside of strings;
 536      that determines quote parity to the comment-end.  */
 537   while (from != stop)
 538     {
 539       EMACS_INT temp_byte;
 540       int prev_syntax, com2start, com2end;
 541       int comstart;
 542
 543       /* Move back and examine a character.  */
 544       DEC_BOTH (from, from_byte);
 545       UPDATE_SYNTAX_TABLE_BACKWARD (from);
 546
 547       prev_syntax = syntax;
 548       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 549       syntax = SYNTAX_WITH_FLAGS (c);
 550       code = SYNTAX (c);
 551
 552       /* Check for 2-char comment markers.  */
 553       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
 554                    && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
 555                    && (comstyle
 556                        == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
 557                    && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
 558                        || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
 559       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
 560                  && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
 561       comstart = (com2start || code == Scomment);
 562
 563       /* Nasty cases with overlapping 2-char comment markers:
 564          - snmp-mode: -- c -- foo -- c --
 565                       --- c --
 566                       ------ c --
 567          - c-mode:    *||*
 568                       |* *|* *|
 569                       |*| |* |*|
 570                       ///   */
 571
 572       /* If a 2-char comment sequence partly overlaps with another,
 573          we don't try to be clever.  E.g. |*| in C, or }% in modes that
 574          have %..\n and %{..}%.  */
 575       if (from > stop && (com2end || comstart))
 576         {
 577           EMACS_INT next = from, next_byte = from_byte;
 578           int next_c, next_syntax;
 579           DEC_BOTH (next, next_byte);
 580           UPDATE_SYNTAX_TABLE_BACKWARD (next);
 581           next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
 582           next_syntax = SYNTAX_WITH_FLAGS (next_c);
 583           if (((comstart || comnested)
 584                && SYNTAX_FLAGS_COMEND_SECOND (syntax)
 585                && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
 586               || ((com2end || comnested)
 587                   && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
 588                   && (comstyle
 589                       == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
 590                   && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
 591             goto lossage;
 592           /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
 593         }
 594
 595       if (com2start && comstart_pos == 0)
 596         /* We're looking at a comment starter.  But it might be a comment
 597            ender as well (see snmp-mode).  The first time we see one, we
 598            need to consider it as a comment starter,
 599            and the subsequent times as a comment ender.  */
 600         com2end = 0;
 601
 602       /* Turn a 2-char comment sequences into the appropriate syntax.  */
 603       if (com2end)
 604         code = Sendcomment;
 605       else if (com2start)
 606         code = Scomment;
 607       /* Ignore comment starters of a different style.  */
 608       else if (code == Scomment
 609                && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
 610                    || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
 611         continue;
 612
 613       /* Ignore escaped characters, except comment-enders.  */
 614       if (code != Sendcomment && char_quoted (from, from_byte))
 615         continue;
 616
 617       switch (code)
 618         {
 619         case Sstring_fence:
 620         case Scomment_fence:
 621           c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
 622         case Sstring:
 623           /* Track parity of quotes.  */
 624           if (string_style == -1)
 625             /* Entering a string.  */
 626             string_style = c;
 627           else if (string_style == c)
 628             /* Leaving the string.  */
 629             string_style = -1;
 630           else
 631             /* If we have two kinds of string delimiters.
 632                There's no way to grok this scanning backwards.  */
 633             string_lossage = 1;
 634           break;
 635
 636         case Scomment:
 637           /* We've already checked that it is the relevant comstyle.  */
 638           if (string_style != -1 || comment_lossage || string_lossage)
 639             /* There are odd string quotes involved, so let's be careful.
 640                Test case in Pascal: " { " a { " } */
 641             goto lossage;
 642
 643           if (!comnested)
 644             {
 645               /* Record best comment-starter so far.  */
 646               comstart_pos = from;
 647               comstart_byte = from_byte;
 648             }
 649           else if (--nesting <= 0)
 650             /* nested comments have to be balanced, so we don't need to
 651                keep looking for earlier ones.  We use here the same (slightly
 652                incorrect) reasoning as below:  since it is followed by uniform
 653                paired string quotes, this comment-start has to be outside of
 654                strings, else the comment-end itself would be inside a string. */
 655             goto done;
 656           break;
 657
 658         case Sendcomment:
 659           if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
 660               && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
 661                   || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
 662             /* This is the same style of comment ender as ours. */
 663             {
 664               if (comnested)
 665                 nesting++;
 666               else
 667                 /* Anything before that can't count because it would match
 668                    this comment-ender rather than ours.  */
 669                 from = stop;    /* Break out of the loop.  */
 670             }
 671           else if (comstart_pos != 0 || c != '\n')
 672             /* We're mixing comment styles here, so we'd better be careful.
 673                The (comstart_pos != 0 || c != '\n') check is not quite correct
 674                (we should just always set comment_lossage), but removing it
 675                would imply that any multiline comment in C would go through
 676                lossage, which seems overkill.
 677                The failure should only happen in the rare cases such as
 678                  { (* } *)   */
 679             comment_lossage = 1;
 680           break;
 681
 682         case Sopen:
 683           /* Assume a defun-start point is outside of strings.  */
 684           if (open_paren_in_column_0_is_defun_start
 685               && (from == stop
 686                   || (temp_byte = dec_bytepos (from_byte),
 687                       FETCH_CHAR (temp_byte) == '\n')))
 688             {
 689               defun_start = from;
 690               defun_start_byte = from_byte;
 691               from = stop;      /* Break out of the loop.  */
 692             }
 693           break;
 694
 695         default:
 696           break;
 697         }
 698     }
 699
 700   if (comstart_pos == 0)
 701     {
 702       from = comment_end;
 703       from_byte = comment_end_byte;
 704       UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
 705     }
 706   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
 707      or `done'), then we've found the beginning of the non-nested comment.  */
 708   else if (1)   /* !comnested */
 709     {
 710       from = comstart_pos;
 711       from_byte = comstart_byte;
 712       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 713     }
 714   else
 715     {
 716       struct lisp_parse_state state;
 717     lossage:
 718       /* We had two kinds of string delimiters mixed up
 719          together.  Decode this going forwards.
 720          Scan fwd from a known safe place (beginning-of-defun)
 721          to the one in question; this records where we
 722          last passed a comment starter.  */
 723       /* If we did not already find the defun start, find it now.  */
 724       if (defun_start == 0)
 725         {
 726           defun_start = find_defun_start (comment_end, comment_end_byte);
 727           defun_start_byte = find_start_value_byte;
 728         }
 729       do
 730         {
 731           scan_sexps_forward (&state,
 732                               defun_start, defun_start_byte,
 733                               comment_end, -10000, 0, Qnil, 0);
 734           defun_start = comment_end;
 735           if (state.incomment == (comnested ? 1 : -1)
 736               && state.comstyle == comstyle)
 737             from = state.comstr_start;
 738           else
 739             {
 740               from = comment_end;
 741               if (state.incomment)
 742                 /* If comment_end is inside some other comment, maybe ours
 743                    is nested, so we need to try again from within the
 744                    surrounding comment.  Example: { a (* " *)  */
 745                 {
 746                   /* FIXME: We should advance by one or two chars. */
 747                   defun_start = state.comstr_start + 2;
 748                   defun_start_byte = CHAR_TO_BYTE (defun_start);
 749                 }
 750             }
 751         } while (defun_start < comment_end);
 752
 753       from_byte = CHAR_TO_BYTE (from);
 754       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 755     }
 756
 757  done:
 758   *charpos_ptr = from;
 759   *bytepos_ptr = from_byte;
 760
 761   return (from == comment_end) ? -1 : from;
 762 }
 763 \f
 764 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
 765        doc: /* Return t if OBJECT is a syntax table.
 766 Currently, any char-table counts as a syntax table.  */)
 767   (Lisp_Object object)
 768 {
 769   if (CHAR_TABLE_P (object)
 770       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
 771     return Qt;
 772   return Qnil;
 773 }
 774
 775 static void
 776 check_syntax_table (Lisp_Object obj)
 777 {
 778   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
 779               Qsyntax_table_p, obj);
 780 }
 781
 782 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
 783        doc: /* Return the current syntax table.
 784 This is the one specified by the current buffer.  */)
 785   (void)
 786 {
 787   return BVAR (current_buffer, syntax_table);
 788 }
 789
 790 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
 791    Sstandard_syntax_table, 0, 0, 0,
 792        doc: /* Return the standard syntax table.
 793 This is the one used for new buffers.  */)
 794   (void)
 795 {
 796   return Vstandard_syntax_table;
 797 }
 798
 799 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
 800        doc: /* Construct a new syntax table and return it.
 801 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
 802   (Lisp_Object table)
 803 {
 804   Lisp_Object copy;
 805
 806   if (!NILP (table))
 807     check_syntax_table (table);
 808   else
 809     table = Vstandard_syntax_table;
 810
 811   copy = Fcopy_sequence (table);
 812
 813   /* Only the standard syntax table should have a default element.
 814      Other syntax tables should inherit from parents instead.  */
 815   XCHAR_TABLE (copy)->defalt = Qnil;
 816
 817   /* Copied syntax tables should all have parents.
 818      If we copied one with no parent, such as the standard syntax table,
 819      use the standard syntax table as the copy's parent.  */
 820   if (NILP (XCHAR_TABLE (copy)->parent))
 821     Fset_char_table_parent (copy, Vstandard_syntax_table);
 822   return copy;
 823 }
 824
 825 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
 826        doc: /* Select a new syntax table for the current buffer.
 827 One argument, a syntax table.  */)
 828   (Lisp_Object table)
 829 {
 830   int idx;
 831   check_syntax_table (table);
 832   BVAR (current_buffer, syntax_table) = table;
 833   /* Indicate that this buffer now has a specified syntax table.  */
 834   idx = PER_BUFFER_VAR_IDX (syntax_table);
 835   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
 836   return table;
 837 }
 838 \f
 839 /* Convert a letter which signifies a syntax code
 840  into the code it signifies.
 841  This is used by modify-syntax-entry, and other things.  */
 842
 843 unsigned char syntax_spec_code[0400] =
 844   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 845     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 846     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 847     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 848     (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
 849         (char) Smath, 0377, 0377, (char) Squote,
 850     (char) Sopen, (char) Sclose, 0377, 0377,
 851         0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
 852     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 853     0377, 0377, 0377, 0377,
 854         (char) Scomment, 0377, (char) Sendcomment, 0377,
 855     (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
 856     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 857     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 858     0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
 859     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
 860     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 861     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 862     0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
 863   };
 864
 865 /* Indexed by syntax code, give the letter that describes it.  */
 866
 867 char syntax_code_spec[16] =
 868   {
 869     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
 870     '!', '|'
 871   };
 872
 873 /* Indexed by syntax code, give the object (cons of syntax code and
 874    nil) to be stored in syntax table.  Since these objects can be
 875    shared among syntax tables, we generate them in advance.  By
 876    sharing objects, the function `describe-syntax' can give a more
 877    compact listing.  */
 878 static Lisp_Object Vsyntax_code_object;
 879
 880 \f
 881 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
 882        doc: /* Return the syntax code of CHARACTER, described by a character.
 883 For example, if CHARACTER is a word constituent, the
 884 character `w' (119) is returned.
 885 The characters that correspond to various syntax codes
 886 are listed in the documentation of `modify-syntax-entry'.  */)
 887   (Lisp_Object character)
 888 {
 889   int char_int;
 890   CHECK_CHARACTER (character);
 891   char_int = XINT (character);
 892   SETUP_BUFFER_SYNTAX_TABLE ();
 893   return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
 894 }
 895
 896 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
 897        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
 898   (Lisp_Object character)
 899 {
 900   int char_int, code;
 901   CHECK_NUMBER (character);
 902   char_int = XINT (character);
 903   SETUP_BUFFER_SYNTAX_TABLE ();
 904   code = SYNTAX (char_int);
 905   if (code == Sopen || code == Sclose)
 906     return SYNTAX_MATCH (char_int);
 907   return Qnil;
 908 }
 909
 910 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
 911        doc: /* Convert a syntax specification STRING into syntax cell form.
 912 STRING should be a string as it is allowed as argument of
 913 `modify-syntax-entry'.  Value is the equivalent cons cell
 914 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
 915 text property.  */)
 916   (Lisp_Object string)
 917 {
 918   register const unsigned char *p;
 919   register enum syntaxcode code;
 920   int val;
 921   Lisp_Object match;
 922
 923   CHECK_STRING (string);
 924
 925   p = SDATA (string);
 926   code = (enum syntaxcode) syntax_spec_code[*p++];
 927   if (((int) code & 0377) == 0377)
 928     error ("Invalid syntax description letter: %c", p[-1]);
 929
 930   if (code == Sinherit)
 931     return Qnil;
 932
 933   if (*p)
 934     {
 935       int len;
 936       int character = STRING_CHAR_AND_LENGTH (p, len);
 937       XSETINT (match, character);
 938       if (XFASTINT (match) == ' ')
 939         match = Qnil;
 940       p += len;
 941     }
 942   else
 943     match = Qnil;
 944
 945   val = (int) code;
 946   while (*p)
 947     switch (*p++)
 948       {
 949       case '1':
 950         val |= 1 << 16;
 951         break;
 952
 953       case '2':
 954         val |= 1 << 17;
 955         break;
 956
 957       case '3':
 958         val |= 1 << 18;
 959         break;
 960
 961       case '4':
 962         val |= 1 << 19;
 963         break;
 964
 965       case 'p':
 966         val |= 1 << 20;
 967         break;
 968
 969       case 'b':
 970         val |= 1 << 21;
 971         break;
 972
 973       case 'n':
 974         val |= 1 << 22;
 975         break;
 976
 977       case 'c':
 978         val |= 1 << 23;
 979         break;
 980       }
 981
 982   if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
 983     return XVECTOR (Vsyntax_code_object)->contents[val];
 984   else
 985     /* Since we can't use a shared object, let's make a new one.  */
 986     return Fcons (make_number (val), match);
 987 }
 988
 989 /* I really don't know why this is interactive
 990    help-form should at least be made useful whilst reading the second arg.  */
 991 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
 992   "cSet syntax for character: \nsSet syntax for %s to: ",
 993        doc: /* Set syntax for character CHAR according to string NEWENTRY.
 994 The syntax is changed only for table SYNTAX-TABLE, which defaults to
 995  the current buffer's syntax table.
 996 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
 997 in the range MIN to MAX are changed.
 998 The first character of NEWENTRY should be one of the following:
 999   Space or -  whitespace syntax.    w   word constituent.
1000   _           symbol constituent.   .   punctuation.
1001   (           open-parenthesis.     )   close-parenthesis.
1002   "           string quote.         \\   escape.
1003   $           paired delimiter.     '   expression quote or prefix operator.
1004   <           comment starter.      >   comment ender.
1005   /           character-quote.      @   inherit from `standard-syntax-table'.
1006   |           generic string fence. !   generic comment fence.
1007
1008 Only single-character comment start and end sequences are represented thus.
1009 Two-character sequences are represented as described below.
1010 The second character of NEWENTRY is the matching parenthesis,
1011  used only if the first character is `(' or `)'.
1012 Any additional characters are flags.
1013 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1014  1 means CHAR is the start of a two-char comment start sequence.
1015  2 means CHAR is the second character of such a sequence.
1016  3 means CHAR is the start of a two-char comment end sequence.
1017  4 means CHAR is the second character of such a sequence.
1018
1019 There can be several orthogonal comment sequences.  This is to support
1020 language modes such as C++.  By default, all comment sequences are of style
1021 a, but you can set the comment sequence style to b (on the second character
1022 of a comment-start, and the first character of a comment-end sequence) and/or
1023 c (on any of its chars) using this flag:
1024  b means CHAR is part of comment sequence b.
1025  c means CHAR is part of comment sequence c.
1026  n means CHAR is part of a nestable comment sequence.
1027
1028  p means CHAR is a prefix character for `backward-prefix-chars';
1029    such characters are treated as whitespace when they occur
1030    between expressions.
1031 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
1032   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1033 {
1034   if (CONSP (c))
1035     {
1036       CHECK_CHARACTER_CAR (c);
1037       CHECK_CHARACTER_CDR (c);
1038     }
1039   else
1040     CHECK_CHARACTER (c);
1041
1042   if (NILP (syntax_table))
1043     syntax_table = BVAR (current_buffer, syntax_table);
1044   else
1045     check_syntax_table (syntax_table);
1046
1047   newentry = Fstring_to_syntax (newentry);
1048   if (CONSP (c))
1049     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1050   else
1051     SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1052
1053   /* We clear the regexp cache, since character classes can now have
1054      different values from those in the compiled regexps.*/
1055   clear_regexp_cache ();
1056
1057   return Qnil;
1058 }
1059 \f
1060 /* Dump syntax table to buffer in human-readable format */
1061
1062 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1063        Sinternal_describe_syntax_value, 1, 1, 0,
1064        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1065   (Lisp_Object syntax)
1066 {
1067   register enum syntaxcode code;
1068   int syntax_code;
1069   char desc, start1, start2, end1, end2, prefix,
1070     comstyleb, comstylec, comnested;
1071   char str[2];
1072   Lisp_Object first, match_lisp, value = syntax;
1073
1074   if (NILP (value))
1075     {
1076       insert_string ("default");
1077       return syntax;
1078     }
1079
1080   if (CHAR_TABLE_P (value))
1081     {
1082       insert_string ("deeper char-table ...");
1083       return syntax;
1084     }
1085
1086   if (!CONSP (value))
1087     {
1088       insert_string ("invalid");
1089       return syntax;
1090     }
1091
1092   first = XCAR (value);
1093   match_lisp = XCDR (value);
1094
1095   if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1096     {
1097       insert_string ("invalid");
1098       return syntax;
1099     }
1100
1101   syntax_code = XINT (first);
1102   code = (enum syntaxcode) (syntax_code & 0377);
1103   start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1104   start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);;
1105   end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1106   end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1107   prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1108   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1109   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1110   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1111
1112   if ((int) code < 0 || (int) code >= (int) Smax)
1113     {
1114       insert_string ("invalid");
1115       return syntax;
1116     }
1117   desc = syntax_code_spec[(int) code];
1118
1119   str[0] = desc, str[1] = 0;
1120   insert (str, 1);
1121
1122   if (NILP (match_lisp))
1123     insert (" ", 1);
1124   else
1125     insert_char (XINT (match_lisp));
1126
1127   if (start1)
1128     insert ("1", 1);
1129   if (start2)
1130     insert ("2", 1);
1131
1132   if (end1)
1133     insert ("3", 1);
1134   if (end2)
1135     insert ("4", 1);
1136
1137   if (prefix)
1138     insert ("p", 1);
1139   if (comstyleb)
1140     insert ("b", 1);
1141   if (comstylec)
1142     insert ("c", 1);
1143   if (comnested)
1144     insert ("n", 1);
1145
1146   insert_string ("\twhich means: ");
1147
1148   switch (SWITCH_ENUM_CAST (code))
1149     {
1150     case Swhitespace:
1151       insert_string ("whitespace"); break;
1152     case Spunct:
1153       insert_string ("punctuation"); break;
1154     case Sword:
1155       insert_string ("word"); break;
1156     case Ssymbol:
1157       insert_string ("symbol"); break;
1158     case Sopen:
1159       insert_string ("open"); break;
1160     case Sclose:
1161       insert_string ("close"); break;
1162     case Squote:
1163       insert_string ("prefix"); break;
1164     case Sstring:
1165       insert_string ("string"); break;
1166     case Smath:
1167       insert_string ("math"); break;
1168     case Sescape:
1169       insert_string ("escape"); break;
1170     case Scharquote:
1171       insert_string ("charquote"); break;
1172     case Scomment:
1173       insert_string ("comment"); break;
1174     case Sendcomment:
1175       insert_string ("endcomment"); break;
1176     case Sinherit:
1177       insert_string ("inherit"); break;
1178     case Scomment_fence:
1179       insert_string ("comment fence"); break;
1180     case Sstring_fence:
1181       insert_string ("string fence"); break;
1182     default:
1183       insert_string ("invalid");
1184       return syntax;
1185     }
1186
1187   if (!NILP (match_lisp))
1188     {
1189       insert_string (", matches ");
1190       insert_char (XINT (match_lisp));
1191     }
1192
1193   if (start1)
1194     insert_string (",\n\t  is the first character of a comment-start sequence");
1195   if (start2)
1196     insert_string (",\n\t  is the second character of a comment-start sequence");
1197
1198   if (end1)
1199     insert_string (",\n\t  is the first character of a comment-end sequence");
1200   if (end2)
1201     insert_string (",\n\t  is the second character of a comment-end sequence");
1202   if (comstyleb)
1203     insert_string (" (comment style b)");
1204   if (comstylec)
1205     insert_string (" (comment style c)");
1206   if (comnested)
1207     insert_string (" (nestable)");
1208
1209   if (prefix)
1210     insert_string (",\n\t  is a prefix character for `backward-prefix-chars'");
1211
1212   return syntax;
1213 }
1214 \f
1215 /* Return the position across COUNT words from FROM.
1216    If that many words cannot be found before the end of the buffer, return 0.
1217    COUNT negative means scan backward and stop at word beginning.  */
1218
1219 EMACS_INT
1220 scan_words (register EMACS_INT from, register EMACS_INT count)
1221 {
1222   register EMACS_INT beg = BEGV;
1223   register EMACS_INT end = ZV;
1224   register EMACS_INT from_byte = CHAR_TO_BYTE (from);
1225   register enum syntaxcode code;
1226   int ch0, ch1;
1227   Lisp_Object func, pos;
1228
1229   immediate_quit = 1;
1230   QUIT;
1231
1232   SETUP_SYNTAX_TABLE (from, count);
1233
1234   while (count > 0)
1235     {
1236       while (1)
1237         {
1238           if (from == end)
1239             {
1240               immediate_quit = 0;
1241               return 0;
1242             }
1243           UPDATE_SYNTAX_TABLE_FORWARD (from);
1244           ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1245           code = SYNTAX (ch0);
1246           INC_BOTH (from, from_byte);
1247           if (words_include_escapes
1248               && (code == Sescape || code == Scharquote))
1249             break;
1250           if (code == Sword)
1251             break;
1252         }
1253       /* Now CH0 is a character which begins a word and FROM is the
1254          position of the next character.  */
1255       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1256       if (! NILP (Ffboundp (func)))
1257         {
1258           pos = call2 (func, make_number (from - 1), make_number (end));
1259           if (INTEGERP (pos) && XINT (pos) > from)
1260             {
1261               from = XINT (pos);
1262               from_byte = CHAR_TO_BYTE (from);
1263             }
1264         }
1265       else
1266         {
1267           while (1)
1268             {
1269               if (from == end) break;
1270               UPDATE_SYNTAX_TABLE_FORWARD (from);
1271               ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1272               code = SYNTAX (ch1);
1273               if ((code != Sword
1274                    && (! words_include_escapes
1275                        || (code != Sescape && code != Scharquote)))
1276                   || word_boundary_p (ch0, ch1))
1277                 break;
1278               INC_BOTH (from, from_byte);
1279               ch0 = ch1;
1280             }
1281         }
1282       count--;
1283     }
1284   while (count < 0)
1285     {
1286       while (1)
1287         {
1288           if (from == beg)
1289             {
1290               immediate_quit = 0;
1291               return 0;
1292             }
1293           DEC_BOTH (from, from_byte);
1294           UPDATE_SYNTAX_TABLE_BACKWARD (from);
1295           ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1296           code = SYNTAX (ch1);
1297           if (words_include_escapes
1298               && (code == Sescape || code == Scharquote))
1299             break;
1300           if (code == Sword)
1301             break;
1302         }
1303       /* Now CH1 is a character which ends a word and FROM is the
1304          position of it.  */
1305       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1306       if (! NILP (Ffboundp (func)))
1307         {
1308           pos = call2 (func, make_number (from), make_number (beg));
1309           if (INTEGERP (pos) && XINT (pos) < from)
1310             {
1311               from = XINT (pos);
1312               from_byte = CHAR_TO_BYTE (from);
1313             }
1314         }
1315       else
1316         {
1317           while (1)
1318             {
1319               if (from == beg)
1320                 break;
1321               DEC_BOTH (from, from_byte);
1322               UPDATE_SYNTAX_TABLE_BACKWARD (from);
1323               ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1324               code = SYNTAX (ch0);
1325               if ((code != Sword
1326                    && (! words_include_escapes
1327                        || (code != Sescape && code != Scharquote)))
1328                   || word_boundary_p (ch0, ch1))
1329                 {
1330                   INC_BOTH (from, from_byte);
1331                   break;
1332                 }
1333               ch1 = ch0;
1334             }
1335         }
1336       count++;
1337     }
1338
1339   immediate_quit = 0;
1340
1341   return from;
1342 }
1343
1344 DEFUE ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1345        doc: /* Move point forward ARG words (backward if ARG is negative).
1346 Normally returns t.
1347 If an edge of the buffer or a field boundary is reached, point is left there
1348 and the function returns nil.  Field boundaries are not noticed if
1349 `inhibit-field-text-motion' is non-nil.  */)
1350   (Lisp_Object arg)
1351 {
1352   Lisp_Object tmp;
1353   int orig_val, val;
1354
1355   if (NILP (arg))
1356     XSETFASTINT (arg, 1);
1357   else
1358     CHECK_NUMBER (arg);
1359
1360   val = orig_val = scan_words (PT, XINT (arg));
1361   if (! orig_val)
1362     val = XINT (arg) > 0 ? ZV : BEGV;
1363
1364   /* Avoid jumping out of an input field.  */
1365   tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1366                              Qt, Qnil, Qnil);
1367   val = XFASTINT (tmp);
1368
1369   SET_PT (val);
1370   return val == orig_val ? Qt : Qnil;
1371 }
1372 \f
1373 DEFUE ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1374        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1375 STRING is like the inside of a `[...]' in a regular expression
1376 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1377  (but not at the end of a range; quoting is never needed there).
1378 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1379 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1380 Char classes, e.g. `[:alpha:]', are supported.
1381
1382 Returns the distance traveled, either zero or positive.  */)
1383   (Lisp_Object string, Lisp_Object lim)
1384 {
1385   return skip_chars (1, string, lim, 1);
1386 }
1387
1388 DEFUE ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1389        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1390 See `skip-chars-forward' for details.
1391 Returns the distance traveled, either zero or negative.  */)
1392   (Lisp_Object string, Lisp_Object lim)
1393 {
1394   return skip_chars (0, string, lim, 1);
1395 }
1396
1397 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1398        doc: /* Move point forward across chars in specified syntax classes.
1399 SYNTAX is a string of syntax code characters.
1400 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1401 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1402 This function returns the distance traveled, either zero or positive.  */)
1403   (Lisp_Object syntax, Lisp_Object lim)
1404 {
1405   return skip_syntaxes (1, syntax, lim);
1406 }
1407
1408 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1409        doc: /* Move point backward across chars in specified syntax classes.
1410 SYNTAX is a string of syntax code characters.
1411 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1412 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1413 This function returns the distance traveled, either zero or negative.  */)
1414   (Lisp_Object syntax, Lisp_Object lim)
1415 {
1416   return skip_syntaxes (0, syntax, lim);
1417 }
1418
1419 static Lisp_Object
1420 skip_chars (int forwardp, Lisp_Object string, Lisp_Object lim, int handle_iso_classes)
1421 {
1422   register unsigned int c;
1423   unsigned char fastmap[0400];
1424   /* Store the ranges of non-ASCII characters.  */
1425   int *char_ranges IF_LINT (= NULL);
1426   int n_char_ranges = 0;
1427   int negate = 0;
1428   register EMACS_INT i, i_byte;
1429   /* Set to 1 if the current buffer is multibyte and the region
1430      contains non-ASCII chars.  */
1431   int multibyte;
1432   /* Set to 1 if STRING is multibyte and it contains non-ASCII
1433      chars.  */
1434   int string_multibyte;
1435   EMACS_INT size_byte;
1436   const unsigned char *str;
1437   int len;
1438   Lisp_Object iso_classes;
1439
1440   CHECK_STRING (string);
1441   iso_classes = Qnil;
1442
1443   if (NILP (lim))
1444     XSETINT (lim, forwardp ? ZV : BEGV);
1445   else
1446     CHECK_NUMBER_COERCE_MARKER (lim);
1447
1448   /* In any case, don't allow scan outside bounds of buffer.  */
1449   if (XINT (lim) > ZV)
1450     XSETFASTINT (lim, ZV);
1451   if (XINT (lim) < BEGV)
1452     XSETFASTINT (lim, BEGV);
1453
1454   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1455                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1456   string_multibyte = SBYTES (string) > SCHARS (string);
1457
1458   memset (fastmap, 0, sizeof fastmap);
1459
1460   str = SDATA (string);
1461   size_byte = SBYTES (string);
1462
1463   i_byte = 0;
1464   if (i_byte < size_byte
1465       && SREF (string, 0) == '^')
1466     {
1467       negate = 1; i_byte++;
1468     }
1469
1470   /* Find the characters specified and set their elements of fastmap.
1471      Handle backslashes and ranges specially.
1472
1473      If STRING contains non-ASCII characters, setup char_ranges for
1474      them and use fastmap only for their leading codes.  */
1475
1476   if (! string_multibyte)
1477     {
1478       int string_has_eight_bit = 0;
1479
1480       /* At first setup fastmap.  */
1481       while (i_byte < size_byte)
1482         {
1483           c = str[i_byte++];
1484
1485           if (handle_iso_classes && c == '['
1486               && i_byte < size_byte
1487               && str[i_byte] == ':')
1488             {
1489               const unsigned char *class_beg = str + i_byte + 1;
1490               const unsigned char *class_end = class_beg;
1491               const unsigned char *class_limit = str + size_byte - 2;
1492               /* Leave room for the null.  */
1493               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1494               re_wctype_t cc;
1495
1496               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1497                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1498
1499               while (class_end < class_limit
1500                      && *class_end >= 'a' && *class_end <= 'z')
1501                 class_end++;
1502
1503               if (class_end == class_beg
1504                   || *class_end != ':' || class_end[1] != ']')
1505                 goto not_a_class_name;
1506
1507               memcpy (class_name, class_beg, class_end - class_beg);
1508               class_name[class_end - class_beg] = 0;
1509
1510               cc = re_wctype (class_name);
1511               if (cc == 0)
1512                 error ("Invalid ISO C character class");
1513
1514               iso_classes = Fcons (make_number (cc), iso_classes);
1515
1516               i_byte = class_end + 2 - str;
1517               continue;
1518             }
1519
1520         not_a_class_name:
1521           if (c == '\\')
1522             {
1523               if (i_byte == size_byte)
1524                 break;
1525
1526               c = str[i_byte++];
1527             }
1528           /* Treat `-' as range character only if another character
1529              follows.  */
1530           if (i_byte + 1 < size_byte
1531               && str[i_byte] == '-')
1532             {
1533               unsigned int c2;
1534
1535               /* Skip over the dash.  */
1536               i_byte++;
1537
1538               /* Get the end of the range.  */
1539               c2 = str[i_byte++];
1540               if (c2 == '\\'
1541                   && i_byte < size_byte)
1542                 c2 = str[i_byte++];
1543
1544               if (c <= c2)
1545                 {
1546                   unsigned lim2 = c2 + 1;
1547                   while (c < lim2)
1548                     fastmap[c++] = 1;
1549                   if (! ASCII_CHAR_P (c2))
1550                     string_has_eight_bit = 1;
1551                 }
1552             }
1553           else
1554             {
1555               fastmap[c] = 1;
1556               if (! ASCII_CHAR_P (c))
1557                 string_has_eight_bit = 1;
1558             }
1559         }
1560
1561       /* If the current range is multibyte and STRING contains
1562          eight-bit chars, arrange fastmap and setup char_ranges for
1563          the corresponding multibyte chars.  */
1564       if (multibyte && string_has_eight_bit)
1565         {
1566           unsigned char fastmap2[0400];
1567           int range_start_byte, range_start_char;
1568
1569           memcpy (fastmap + 0200, fastmap2 + 0200, 0200);
1570           memset (fastmap + 0200, 0, 0200);
1571           /* We are sure that this loop stops.  */
1572           for (i = 0200; ! fastmap2[i]; i++);
1573           c = BYTE8_TO_CHAR (i);
1574           fastmap[CHAR_LEADING_CODE (c)] = 1;
1575           range_start_byte = i;
1576           range_start_char = c;
1577           char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1578           for (i = 129; i < 0400; i++)
1579             {
1580               c = BYTE8_TO_CHAR (i);
1581               fastmap[CHAR_LEADING_CODE (c)] = 1;
1582               if (i - range_start_byte != c - range_start_char)
1583                 {
1584                   char_ranges[n_char_ranges++] = range_start_char;
1585                   char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1586                                                   + range_start_char);
1587                   range_start_byte = i;
1588                   range_start_char = c;
1589                 }
1590             }
1591           char_ranges[n_char_ranges++] = range_start_char;
1592           char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1593                                           + range_start_char);
1594         }
1595     }
1596   else                          /* STRING is multibyte */
1597     {
1598       char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1599
1600       while (i_byte < size_byte)
1601         {
1602           unsigned char leading_code;
1603
1604           leading_code = str[i_byte];
1605           c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1606           i_byte += len;
1607
1608           if (handle_iso_classes && c == '['
1609               && i_byte < size_byte
1610               && STRING_CHAR (str + i_byte) == ':')
1611             {
1612               const unsigned char *class_beg = str + i_byte + 1;
1613               const unsigned char *class_end = class_beg;
1614               const unsigned char *class_limit = str + size_byte - 2;
1615               /* Leave room for the null.        */
1616               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1617               re_wctype_t cc;
1618
1619               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1620                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1621
1622               while (class_end < class_limit
1623                      && *class_end >= 'a' && *class_end <= 'z')
1624                 class_end++;
1625
1626               if (class_end == class_beg
1627                   || *class_end != ':' || class_end[1] != ']')
1628                 goto not_a_class_name_multibyte;
1629
1630               memcpy (class_name, class_beg, class_end - class_beg);
1631               class_name[class_end - class_beg] = 0;
1632
1633               cc = re_wctype (class_name);
1634               if (cc == 0)
1635                 error ("Invalid ISO C character class");
1636
1637               iso_classes = Fcons (make_number (cc), iso_classes);
1638
1639               i_byte = class_end + 2 - str;
1640               continue;
1641             }
1642
1643         not_a_class_name_multibyte:
1644           if (c == '\\')
1645             {
1646               if (i_byte == size_byte)
1647                 break;
1648
1649               leading_code = str[i_byte];
1650               c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1651               i_byte += len;
1652             }
1653           /* Treat `-' as range character only if another character
1654              follows.  */
1655           if (i_byte + 1 < size_byte
1656               && str[i_byte] == '-')
1657             {
1658               unsigned int c2;
1659               unsigned char leading_code2;
1660
1661               /* Skip over the dash.  */
1662               i_byte++;
1663
1664               /* Get the end of the range.  */
1665               leading_code2 = str[i_byte];
1666               c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1667               i_byte += len;
1668
1669               if (c2 == '\\'
1670                   && i_byte < size_byte)
1671                 {
1672                   leading_code2 = str[i_byte];
1673                   c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
1674                   i_byte += len;
1675                 }
1676
1677               if (c > c2)
1678                 continue;
1679               if (ASCII_CHAR_P (c))
1680                 {
1681                   while (c <= c2 && c < 0x80)
1682                     fastmap[c++] = 1;
1683                   leading_code = CHAR_LEADING_CODE (c);
1684                 }
1685               if (! ASCII_CHAR_P (c))
1686                 {
1687                   unsigned lim2 = leading_code2 + 1;
1688                   while (leading_code < lim2)
1689                     fastmap[leading_code++] = 1;
1690                   if (c <= c2)
1691                     {
1692                       char_ranges[n_char_ranges++] = c;
1693                       char_ranges[n_char_ranges++] = c2;
1694                     }
1695                 }
1696             }
1697           else
1698             {
1699               if (ASCII_CHAR_P (c))
1700                 fastmap[c] = 1;
1701               else
1702                 {
1703                   fastmap[leading_code] = 1;
1704                   char_ranges[n_char_ranges++] = c;
1705                   char_ranges[n_char_ranges++] = c;
1706                 }
1707             }
1708         }
1709
1710       /* If the current range is unibyte and STRING contains non-ASCII
1711          chars, arrange fastmap for the corresponding unibyte
1712          chars.  */
1713
1714       if (! multibyte && n_char_ranges > 0)
1715         {
1716           memset (fastmap + 0200, 0, 0200);
1717           for (i = 0; i < n_char_ranges; i += 2)
1718             {
1719               int c1 = char_ranges[i];
1720               unsigned lim2 = char_ranges[i + 1] + 1;
1721
1722               for (; c1 < lim2; c1++)
1723                 {
1724                   int b = CHAR_TO_BYTE_SAFE (c1);
1725                   if (b >= 0)
1726                     fastmap[b] = 1;
1727                 }
1728             }
1729         }
1730     }
1731
1732   /* If ^ was the first character, complement the fastmap.  */
1733   if (negate)
1734     {
1735       if (! multibyte)
1736         for (i = 0; i < sizeof fastmap; i++)
1737           fastmap[i] ^= 1;
1738       else
1739         {
1740           for (i = 0; i < 0200; i++)
1741             fastmap[i] ^= 1;
1742           /* All non-ASCII chars possibly match.  */
1743           for (; i < sizeof fastmap; i++)
1744             fastmap[i] = 1;
1745         }
1746     }
1747
1748   {
1749     EMACS_INT start_point = PT;
1750     EMACS_INT pos = PT;
1751     EMACS_INT pos_byte = PT_BYTE;
1752     unsigned char *p = PT_ADDR, *endp, *stop;
1753
1754     if (forwardp)
1755       {
1756         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1757         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1758       }
1759     else
1760       {
1761         endp = CHAR_POS_ADDR (XINT (lim));
1762         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1763       }
1764
1765     immediate_quit = 1;
1766     /* This code may look up syntax tables using macros that rely on the
1767        gl_state object.  To make sure this object is not out of date,
1768        let's initialize it manually.
1769        We ignore syntax-table text-properties for now, since that's
1770        what we've done in the past.  */
1771     SETUP_BUFFER_SYNTAX_TABLE ();
1772     if (forwardp)
1773       {
1774         if (multibyte)
1775           while (1)
1776             {
1777               int nbytes;
1778
1779               if (p >= stop)
1780                 {
1781                   if (p >= endp)
1782                     break;
1783                   p = GAP_END_ADDR;
1784                   stop = endp;
1785                 }
1786               c = STRING_CHAR_AND_LENGTH (p, nbytes);
1787               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1788                 {
1789                   if (negate)
1790                     break;
1791                   else
1792                     goto fwd_ok;
1793                 }
1794
1795               if (! fastmap[*p])
1796                 break;
1797               if (! ASCII_CHAR_P (c))
1798                 {
1799                   /* As we are looking at a multibyte character, we
1800                      must look up the character in the table
1801                      CHAR_RANGES.  If there's no data in the table,
1802                      that character is not what we want to skip.  */
1803
1804                   /* The following code do the right thing even if
1805                      n_char_ranges is zero (i.e. no data in
1806                      CHAR_RANGES).  */
1807                   for (i = 0; i < n_char_ranges; i += 2)
1808                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1809                       break;
1810                   if (!(negate ^ (i < n_char_ranges)))
1811                     break;
1812                 }
1813             fwd_ok:
1814               p += nbytes, pos++, pos_byte += nbytes;
1815             }
1816         else
1817           while (1)
1818             {
1819               if (p >= stop)
1820                 {
1821                   if (p >= endp)
1822                     break;
1823                   p = GAP_END_ADDR;
1824                   stop = endp;
1825                 }
1826
1827               if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1828                 {
1829                   if (negate)
1830                     break;
1831                   else
1832                     goto fwd_unibyte_ok;
1833                 }
1834
1835               if (!fastmap[*p])
1836                 break;
1837             fwd_unibyte_ok:
1838               p++, pos++, pos_byte++;
1839             }
1840       }
1841     else
1842       {
1843         if (multibyte)
1844           while (1)
1845             {
1846               unsigned char *prev_p;
1847
1848               if (p <= stop)
1849                 {
1850                   if (p <= endp)
1851                     break;
1852                   p = GPT_ADDR;
1853                   stop = endp;
1854                 }
1855               prev_p = p;
1856               while (--p >= stop && ! CHAR_HEAD_P (*p));
1857               c = STRING_CHAR (p);
1858
1859               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1860                 {
1861                   if (negate)
1862                     break;
1863                   else
1864                     goto back_ok;
1865                 }
1866
1867               if (! fastmap[*p])
1868                 break;
1869               if (! ASCII_CHAR_P (c))
1870                 {
1871                   /* See the comment in the previous similar code.  */
1872                   for (i = 0; i < n_char_ranges; i += 2)
1873                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1874                       break;
1875                   if (!(negate ^ (i < n_char_ranges)))
1876                     break;
1877                 }
1878             back_ok:
1879               pos--, pos_byte -= prev_p - p;
1880             }
1881         else
1882           while (1)
1883             {
1884               if (p <= stop)
1885                 {
1886                   if (p <= endp)
1887                     break;
1888                   p = GPT_ADDR;
1889                   stop = endp;
1890                 }
1891
1892               if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1893                 {
1894                   if (negate)
1895                     break;
1896                   else
1897                     goto back_unibyte_ok;
1898                 }
1899
1900               if (!fastmap[p[-1]])
1901                 break;
1902             back_unibyte_ok:
1903               p--, pos--, pos_byte--;
1904             }
1905       }
1906
1907     SET_PT_BOTH (pos, pos_byte);
1908     immediate_quit = 0;
1909
1910     return make_number (PT - start_point);
1911   }
1912 }
1913
1914
1915 static Lisp_Object
1916 skip_syntaxes (int forwardp, Lisp_Object string, Lisp_Object lim)
1917 {
1918   register unsigned int c;
1919   unsigned char fastmap[0400];
1920   int negate = 0;
1921   register EMACS_INT i, i_byte;
1922   int multibyte;
1923   EMACS_INT size_byte;
1924   unsigned char *str;
1925
1926   CHECK_STRING (string);
1927
1928   if (NILP (lim))
1929     XSETINT (lim, forwardp ? ZV : BEGV);
1930   else
1931     CHECK_NUMBER_COERCE_MARKER (lim);
1932
1933   /* In any case, don't allow scan outside bounds of buffer.  */
1934   if (XINT (lim) > ZV)
1935     XSETFASTINT (lim, ZV);
1936   if (XINT (lim) < BEGV)
1937     XSETFASTINT (lim, BEGV);
1938
1939   if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1940     return make_number (0);
1941
1942   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1943                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1944
1945   memset (fastmap, 0, sizeof fastmap);
1946
1947   if (SBYTES (string) > SCHARS (string))
1948     /* As this is very rare case (syntax spec is ASCII only), don't
1949        consider efficiency.  */
1950     string = string_make_unibyte (string);
1951
1952   str = SDATA (string);
1953   size_byte = SBYTES (string);
1954
1955   i_byte = 0;
1956   if (i_byte < size_byte
1957       && SREF (string, 0) == '^')
1958     {
1959       negate = 1; i_byte++;
1960     }
1961
1962   /* Find the syntaxes specified and set their elements of fastmap.  */
1963
1964   while (i_byte < size_byte)
1965     {
1966       c = str[i_byte++];
1967       fastmap[syntax_spec_code[c]] = 1;
1968     }
1969
1970   /* If ^ was the first character, complement the fastmap.  */
1971   if (negate)
1972     for (i = 0; i < sizeof fastmap; i++)
1973       fastmap[i] ^= 1;
1974
1975   {
1976     EMACS_INT start_point = PT;
1977     EMACS_INT pos = PT;
1978     EMACS_INT pos_byte = PT_BYTE;
1979     unsigned char *p = PT_ADDR, *endp, *stop;
1980
1981     if (forwardp)
1982       {
1983         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1984         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1985       }
1986     else
1987       {
1988         endp = CHAR_POS_ADDR (XINT (lim));
1989         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1990       }
1991
1992     immediate_quit = 1;
1993     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1994     if (forwardp)
1995       {
1996         if (multibyte)
1997           {
1998             while (1)
1999               {
2000                 int nbytes;
2001
2002                 if (p >= stop)
2003                   {
2004                     if (p >= endp)
2005                       break;
2006                     p = GAP_END_ADDR;
2007                     stop = endp;
2008                   }
2009                 c = STRING_CHAR_AND_LENGTH (p, nbytes);
2010                 if (! fastmap[(int) SYNTAX (c)])
2011                   break;
2012                 p += nbytes, pos++, pos_byte += nbytes;
2013                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2014               }
2015           }
2016         else
2017           {
2018             while (1)
2019               {
2020                 if (p >= stop)
2021                   {
2022                     if (p >= endp)
2023                       break;
2024                     p = GAP_END_ADDR;
2025                     stop = endp;
2026                   }
2027                 if (! fastmap[(int) SYNTAX (*p)])
2028                   break;
2029                 p++, pos++, pos_byte++;
2030                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2031               }
2032           }
2033       }
2034     else
2035       {
2036         if (multibyte)
2037           {
2038             while (1)
2039               {
2040                 unsigned char *prev_p;
2041
2042                 if (p <= stop)
2043                   {
2044                     if (p <= endp)
2045                       break;
2046                     p = GPT_ADDR;
2047                     stop = endp;
2048                   }
2049                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2050                 prev_p = p;
2051                 while (--p >= stop && ! CHAR_HEAD_P (*p));
2052                 c = STRING_CHAR (p);
2053                 if (! fastmap[(int) SYNTAX (c)])
2054                   break;
2055                 pos--, pos_byte -= prev_p - p;
2056               }
2057           }
2058         else
2059           {
2060             while (1)
2061               {
2062                 if (p <= stop)
2063                   {
2064                     if (p <= endp)
2065                       break;
2066                     p = GPT_ADDR;
2067                     stop = endp;
2068                   }
2069                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2070                 if (! fastmap[(int) SYNTAX (p[-1])])
2071                   break;
2072                 p--, pos--, pos_byte--;
2073               }
2074           }
2075       }
2076
2077     SET_PT_BOTH (pos, pos_byte);
2078     immediate_quit = 0;
2079
2080     return make_number (PT - start_point);
2081   }
2082 }
2083
2084 /* Return 1 if character C belongs to one of the ISO classes
2085    in the list ISO_CLASSES.  Each class is represented by an
2086    integer which is its type according to re_wctype.  */
2087
2088 static int
2089 in_classes (int c, Lisp_Object iso_classes)
2090 {
2091   int fits_class = 0;
2092
2093   while (CONSP (iso_classes))
2094     {
2095       Lisp_Object elt;
2096       elt = XCAR (iso_classes);
2097       iso_classes = XCDR (iso_classes);
2098
2099       if (re_iswctype (c, XFASTINT (elt)))
2100         fits_class = 1;
2101     }
2102
2103   return fits_class;
2104 }
2105 \f
2106 /* Jump over a comment, assuming we are at the beginning of one.
2107    FROM is the current position.
2108    FROM_BYTE is the bytepos corresponding to FROM.
2109    Do not move past STOP (a charpos).
2110    The comment over which we have to jump is of style STYLE
2111      (either SYNTAX_FLAGS_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2112    NESTING should be positive to indicate the nesting at the beginning
2113      for nested comments and should be zero or negative else.
2114      ST_COMMENT_STYLE cannot be nested.
2115    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2116      (or 0 If the search cannot start in the middle of a two-character).
2117
2118    If successful, return 1 and store the charpos of the comment's end
2119    into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2120    Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2121    corresponding bytepos into *BYTEPOS_PTR and the current nesting
2122    (as defined for state.incomment) in *INCOMMENT_PTR.
2123
2124    The comment end is the last character of the comment rather than the
2125      character just after the comment.
2126
2127    Global syntax data is assumed to initially be valid for FROM and
2128    remains valid for forward search starting at the returned position. */
2129
2130 static int
2131 forw_comment (EMACS_INT from, EMACS_INT from_byte, EMACS_INT stop,
2132               int nesting, int style, int prev_syntax,
2133               EMACS_INT *charpos_ptr, EMACS_INT *bytepos_ptr,
2134               int *incomment_ptr)
2135 {
2136   register int c, c1;
2137   register enum syntaxcode code;
2138   register int syntax, other_syntax;
2139
2140   if (nesting <= 0) nesting = -1;
2141
2142   /* Enter the loop in the middle so that we find
2143      a 2-char comment ender if we start in the middle of it.  */
2144   syntax = prev_syntax;
2145   if (syntax != 0) goto forw_incomment;
2146
2147   while (1)
2148     {
2149       if (from == stop)
2150         {
2151           *incomment_ptr = nesting;
2152           *charpos_ptr = from;
2153           *bytepos_ptr = from_byte;
2154           return 0;
2155         }
2156       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2157       syntax = SYNTAX_WITH_FLAGS (c);
2158       code = syntax & 0xff;
2159       if (code == Sendcomment
2160           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2161           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2162               (nesting > 0 && --nesting == 0) : nesting < 0))
2163         /* we have encountered a comment end of the same style
2164            as the comment sequence which began this comment
2165            section */
2166         break;
2167       if (code == Scomment_fence
2168           && style == ST_COMMENT_STYLE)
2169         /* we have encountered a comment end of the same style
2170            as the comment sequence which began this comment
2171            section.  */
2172         break;
2173       if (nesting > 0
2174           && code == Scomment
2175           && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2176           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2177         /* we have encountered a nested comment of the same style
2178            as the comment sequence which began this comment section */
2179         nesting++;
2180       INC_BOTH (from, from_byte);
2181       UPDATE_SYNTAX_TABLE_FORWARD (from);
2182
2183     forw_incomment:
2184       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2185           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2186               other_syntax = SYNTAX_WITH_FLAGS (c1),
2187               SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2188           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2189           && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2190                SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2191               ? nesting > 0 : nesting < 0))
2192         {
2193           if (--nesting <= 0)
2194             /* we have encountered a comment end of the same style
2195                as the comment sequence which began this comment
2196                section */
2197             break;
2198           else
2199             {
2200               INC_BOTH (from, from_byte);
2201               UPDATE_SYNTAX_TABLE_FORWARD (from);
2202             }
2203         }
2204       if (nesting > 0
2205           && from < stop
2206           && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2207           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2208               other_syntax = SYNTAX_WITH_FLAGS (c1),
2209               SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2210               && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2211           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2212               SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2213         /* we have encountered a nested comment of the same style
2214            as the comment sequence which began this comment
2215            section */
2216         {
2217           INC_BOTH (from, from_byte);
2218           UPDATE_SYNTAX_TABLE_FORWARD (from);
2219           nesting++;
2220         }
2221     }
2222   *charpos_ptr = from;
2223   *bytepos_ptr = from_byte;
2224   return 1;
2225 }
2226
2227 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2228        doc: /*
2229 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
2230 Stop scanning if we find something other than a comment or whitespace.
2231 Set point to where scanning stops.
2232 If COUNT comments are found as expected, with nothing except whitespace
2233 between them, return t; otherwise return nil.  */)
2234   (Lisp_Object count)
2235 {
2236   register EMACS_INT from;
2237   EMACS_INT from_byte;
2238   register EMACS_INT stop;
2239   register int c, c1;
2240   register enum syntaxcode code;
2241   int comstyle = 0;         /* style of comment encountered */
2242   int comnested = 0;        /* whether the comment is nestable or not */
2243   int found;
2244   EMACS_INT count1;
2245   EMACS_INT out_charpos, out_bytepos;
2246   int dummy;
2247
2248   CHECK_NUMBER (count);
2249   count1 = XINT (count);
2250   stop = count1 > 0 ? ZV : BEGV;
2251
2252   immediate_quit = 1;
2253   QUIT;
2254
2255   from = PT;
2256   from_byte = PT_BYTE;
2257
2258   SETUP_SYNTAX_TABLE (from, count1);
2259   while (count1 > 0)
2260     {
2261       do
2262         {
2263           int comstart_first, syntax, other_syntax;
2264
2265           if (from == stop)
2266             {
2267               SET_PT_BOTH (from, from_byte);
2268               immediate_quit = 0;
2269               return Qnil;
2270             }
2271           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2272           syntax = SYNTAX_WITH_FLAGS (c);
2273           code = SYNTAX (c);
2274           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2275           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2276           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2277           INC_BOTH (from, from_byte);
2278           UPDATE_SYNTAX_TABLE_FORWARD (from);
2279           if (from < stop && comstart_first
2280               && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2281                   other_syntax = SYNTAX_WITH_FLAGS (c1),
2282                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2283             {
2284               /* We have encountered a comment start sequence and we
2285                  are ignoring all text inside comments.  We must record
2286                  the comment style this sequence begins so that later,
2287                  only a comment end of the same style actually ends
2288                  the comment section.  */
2289               code = Scomment;
2290               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2291               comnested
2292                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2293               INC_BOTH (from, from_byte);
2294               UPDATE_SYNTAX_TABLE_FORWARD (from);
2295             }
2296         }
2297       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2298
2299       if (code == Scomment_fence)
2300         comstyle = ST_COMMENT_STYLE;
2301       else if (code != Scomment)
2302         {
2303           immediate_quit = 0;
2304           DEC_BOTH (from, from_byte);
2305           SET_PT_BOTH (from, from_byte);
2306           return Qnil;
2307         }
2308       /* We're at the start of a comment.  */
2309       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2310                             &out_charpos, &out_bytepos, &dummy);
2311       from = out_charpos; from_byte = out_bytepos;
2312       if (!found)
2313         {
2314           immediate_quit = 0;
2315           SET_PT_BOTH (from, from_byte);
2316           return Qnil;
2317         }
2318       INC_BOTH (from, from_byte);
2319       UPDATE_SYNTAX_TABLE_FORWARD (from);
2320       /* We have skipped one comment.  */
2321       count1--;
2322     }
2323
2324   while (count1 < 0)
2325     {
2326       while (1)
2327         {
2328           int quoted, syntax;
2329
2330           if (from <= stop)
2331             {
2332               SET_PT_BOTH (BEGV, BEGV_BYTE);
2333               immediate_quit = 0;
2334               return Qnil;
2335             }
2336
2337           DEC_BOTH (from, from_byte);
2338           /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2339           quoted = char_quoted (from, from_byte);
2340           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2341           syntax = SYNTAX_WITH_FLAGS (c);
2342           code = SYNTAX (c);
2343           comstyle = 0;
2344           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2345           if (code == Sendcomment)
2346             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2347           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2348               && prev_char_comend_first (from, from_byte)
2349               && !char_quoted (from - 1, dec_bytepos (from_byte)))
2350             {
2351               int other_syntax;
2352               /* We must record the comment style encountered so that
2353                  later, we can match only the proper comment begin
2354                  sequence of the same style.  */
2355               DEC_BOTH (from, from_byte);
2356               code = Sendcomment;
2357               /* Calling char_quoted, above, set up global syntax position
2358                  at the new value of FROM.  */
2359               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2360               other_syntax = SYNTAX_WITH_FLAGS (c1);
2361               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2362               comnested
2363                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2364             }
2365
2366           if (code == Scomment_fence)
2367             {
2368               /* Skip until first preceding unquoted comment_fence.  */
2369               int fence_found = 0;
2370               EMACS_INT ini = from, ini_byte = from_byte;
2371
2372               while (1)
2373                 {
2374                   DEC_BOTH (from, from_byte);
2375                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2376                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2377                   if (SYNTAX (c) == Scomment_fence
2378                       && !char_quoted (from, from_byte))
2379                     {
2380                       fence_found = 1;
2381                       break;
2382                     }
2383                   else if (from == stop)
2384                     break;
2385                 }
2386               if (fence_found == 0)
2387                 {
2388                   from = ini;           /* Set point to ini + 1.  */
2389                   from_byte = ini_byte;
2390                   goto leave;
2391                 }
2392               else
2393                 /* We have skipped one comment.  */
2394                 break;
2395             }
2396           else if (code == Sendcomment)
2397             {
2398               found = back_comment (from, from_byte, stop, comnested, comstyle,
2399                                     &out_charpos, &out_bytepos);
2400               if (found == -1)
2401                 {
2402                   if (c == '\n')
2403                     /* This end-of-line is not an end-of-comment.
2404                        Treat it like a whitespace.
2405                        CC-mode (and maybe others) relies on this behavior.  */
2406                     ;
2407                   else
2408                     {
2409                       /* Failure: we should go back to the end of this
2410                          not-quite-endcomment.  */
2411                       if (SYNTAX (c) != code)
2412                         /* It was a two-char Sendcomment.  */
2413                         INC_BOTH (from, from_byte);
2414                       goto leave;
2415                     }
2416                 }
2417               else
2418                 {
2419                   /* We have skipped one comment.  */
2420                   from = out_charpos, from_byte = out_bytepos;
2421                   break;
2422                 }
2423             }
2424           else if (code != Swhitespace || quoted)
2425             {
2426             leave:
2427               immediate_quit = 0;
2428               INC_BOTH (from, from_byte);
2429               SET_PT_BOTH (from, from_byte);
2430               return Qnil;
2431             }
2432         }
2433
2434       count1++;
2435     }
2436
2437   SET_PT_BOTH (from, from_byte);
2438   immediate_quit = 0;
2439   return Qt;
2440 }
2441 \f
2442 /* Return syntax code of character C if C is an ASCII character
2443    or `multibyte_symbol_p' is zero.  Otherwise, return Ssymbol.  */
2444
2445 #define SYNTAX_WITH_MULTIBYTE_CHECK(c)          \
2446   ((ASCII_CHAR_P (c) || !multibyte_symbol_p)    \
2447    ? SYNTAX (c) : Ssymbol)
2448
2449 static Lisp_Object
2450 scan_lists (register EMACS_INT from, EMACS_INT count, EMACS_INT depth, int sexpflag)
2451 {
2452   Lisp_Object val;
2453   register EMACS_INT stop = count > 0 ? ZV : BEGV;
2454   register int c, c1;
2455   int stringterm;
2456   int quoted;
2457   int mathexit = 0;
2458   register enum syntaxcode code, temp_code;
2459   int min_depth = depth;    /* Err out if depth gets less than this.  */
2460   int comstyle = 0;         /* style of comment encountered */
2461   int comnested = 0;        /* whether the comment is nestable or not */
2462   EMACS_INT temp_pos;
2463   EMACS_INT last_good = from;
2464   int found;
2465   EMACS_INT from_byte;
2466   EMACS_INT out_bytepos, out_charpos;
2467   int temp, dummy;
2468   int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2469
2470   if (depth > 0) min_depth = 0;
2471
2472   if (from > ZV) from = ZV;
2473   if (from < BEGV) from = BEGV;
2474
2475   from_byte = CHAR_TO_BYTE (from);
2476
2477   immediate_quit = 1;
2478   QUIT;
2479
2480   SETUP_SYNTAX_TABLE (from, count);
2481   while (count > 0)
2482     {
2483       while (from < stop)
2484         {
2485           int comstart_first, prefix, syntax, other_syntax;
2486           UPDATE_SYNTAX_TABLE_FORWARD (from);
2487           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2488           syntax = SYNTAX_WITH_FLAGS (c);
2489           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2490           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2491           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2492           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2493           prefix = SYNTAX_FLAGS_PREFIX (syntax);
2494           if (depth == min_depth)
2495             last_good = from;
2496           INC_BOTH (from, from_byte);
2497           UPDATE_SYNTAX_TABLE_FORWARD (from);
2498           if (from < stop && comstart_first
2499               && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2500                   other_syntax = SYNTAX_WITH_FLAGS (c),
2501                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2502               && parse_sexp_ignore_comments)
2503             {
2504               /* we have encountered a comment start sequence and we
2505                  are ignoring all text inside comments.  We must record
2506                  the comment style this sequence begins so that later,
2507                  only a comment end of the same style actually ends
2508                  the comment section */
2509               code = Scomment;
2510               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2511               comnested
2512                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2513               INC_BOTH (from, from_byte);
2514               UPDATE_SYNTAX_TABLE_FORWARD (from);
2515             }
2516
2517           if (prefix)
2518             continue;
2519
2520           switch (SWITCH_ENUM_CAST (code))
2521             {
2522             case Sescape:
2523             case Scharquote:
2524               if (from == stop)
2525                 goto lose;
2526               INC_BOTH (from, from_byte);
2527               /* treat following character as a word constituent */
2528             case Sword:
2529             case Ssymbol:
2530               if (depth || !sexpflag) break;
2531               /* This word counts as a sexp; return at end of it.  */
2532               while (from < stop)
2533                 {
2534                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2535
2536                   /* Some compilers can't handle this inside the switch.  */
2537                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2538                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2539                   switch (temp)
2540                     {
2541                     case Scharquote:
2542                     case Sescape:
2543                       INC_BOTH (from, from_byte);
2544                       if (from == stop)
2545                         goto lose;
2546                       break;
2547                     case Sword:
2548                     case Ssymbol:
2549                     case Squote:
2550                       break;
2551                     default:
2552                       goto done;
2553                     }
2554                   INC_BOTH (from, from_byte);
2555                 }
2556               goto done;
2557
2558             case Scomment_fence:
2559               comstyle = ST_COMMENT_STYLE;
2560               /* FALLTHROUGH */
2561             case Scomment:
2562               if (!parse_sexp_ignore_comments) break;
2563               UPDATE_SYNTAX_TABLE_FORWARD (from);
2564               found = forw_comment (from, from_byte, stop,
2565                                     comnested, comstyle, 0,
2566                                     &out_charpos, &out_bytepos, &dummy);
2567               from = out_charpos, from_byte = out_bytepos;
2568               if (!found)
2569                 {
2570                   if (depth == 0)
2571                     goto done;
2572                   goto lose;
2573                 }
2574               INC_BOTH (from, from_byte);
2575               UPDATE_SYNTAX_TABLE_FORWARD (from);
2576               break;
2577
2578             case Smath:
2579               if (!sexpflag)
2580                 break;
2581               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2582                 {
2583                   INC_BOTH (from, from_byte);
2584                 }
2585               if (mathexit)
2586                 {
2587                   mathexit = 0;
2588                   goto close1;
2589                 }
2590               mathexit = 1;
2591
2592             case Sopen:
2593               if (!++depth) goto done;
2594               break;
2595
2596             case Sclose:
2597             close1:
2598               if (!--depth) goto done;
2599               if (depth < min_depth)
2600                 xsignal3 (Qscan_error,
2601                           build_string ("Containing expression ends prematurely"),
2602                           make_number (last_good), make_number (from));
2603               break;
2604
2605             case Sstring:
2606             case Sstring_fence:
2607               temp_pos = dec_bytepos (from_byte);
2608               stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2609               while (1)
2610                 {
2611                   if (from >= stop)
2612                     goto lose;
2613                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2614                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2615                   if (code == Sstring
2616                       ? (c == stringterm
2617                          && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2618                       : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2619                     break;
2620
2621                   /* Some compilers can't handle this inside the switch.  */
2622                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2623                   switch (temp)
2624                     {
2625                     case Scharquote:
2626                     case Sescape:
2627                       INC_BOTH (from, from_byte);
2628                     }
2629                   INC_BOTH (from, from_byte);
2630                 }
2631               INC_BOTH (from, from_byte);
2632               if (!depth && sexpflag) goto done;
2633               break;
2634             default:
2635               /* Ignore whitespace, punctuation, quote, endcomment.  */
2636               break;
2637             }
2638         }
2639
2640       /* Reached end of buffer.  Error if within object, return nil if between */
2641       if (depth)
2642         goto lose;
2643
2644       immediate_quit = 0;
2645       return Qnil;
2646
2647       /* End of object reached */
2648     done:
2649       count--;
2650     }
2651
2652   while (count < 0)
2653     {
2654       while (from > stop)
2655         {
2656           int syntax;
2657           DEC_BOTH (from, from_byte);
2658           UPDATE_SYNTAX_TABLE_BACKWARD (from);
2659           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2660           syntax= SYNTAX_WITH_FLAGS (c);
2661           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2662           if (depth == min_depth)
2663             last_good = from;
2664           comstyle = 0;
2665           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2666           if (code == Sendcomment)
2667             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2668           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2669               && prev_char_comend_first (from, from_byte)
2670               && parse_sexp_ignore_comments)
2671             {
2672               /* We must record the comment style encountered so that
2673                  later, we can match only the proper comment begin
2674                  sequence of the same style.  */
2675               int c2, other_syntax;
2676               DEC_BOTH (from, from_byte);
2677               UPDATE_SYNTAX_TABLE_BACKWARD (from);
2678               code = Sendcomment;
2679               c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2680               other_syntax = SYNTAX_WITH_FLAGS (c2);
2681               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2682               comnested
2683                 = comnested || SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2684             }
2685
2686           /* Quoting turns anything except a comment-ender
2687              into a word character.  Note that this cannot be true
2688              if we decremented FROM in the if-statement above.  */
2689           if (code != Sendcomment && char_quoted (from, from_byte))
2690             {
2691               DEC_BOTH (from, from_byte);
2692               code = Sword;
2693             }
2694           else if (SYNTAX_FLAGS_PREFIX (syntax))
2695             continue;
2696
2697           switch (SWITCH_ENUM_CAST (code))
2698             {
2699             case Sword:
2700             case Ssymbol:
2701             case Sescape:
2702             case Scharquote:
2703               if (depth || !sexpflag) break;
2704               /* This word counts as a sexp; count object finished
2705                  after passing it.  */
2706               while (from > stop)
2707                 {
2708                   temp_pos = from_byte;
2709                   if (! NILP (BVAR (current_buffer, enable_multibyte_characters)))
2710                     DEC_POS (temp_pos);
2711                   else
2712                     temp_pos--;
2713                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2714                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2715                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2716                   /* Don't allow comment-end to be quoted.  */
2717                   if (temp_code == Sendcomment)
2718                     goto done2;
2719                   quoted = char_quoted (from - 1, temp_pos);
2720                   if (quoted)
2721                     {
2722                       DEC_BOTH (from, from_byte);
2723                       temp_pos = dec_bytepos (temp_pos);
2724                       UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2725                     }
2726                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2727                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2728                   if (! (quoted || temp_code == Sword
2729                          || temp_code == Ssymbol
2730                          || temp_code == Squote))
2731                     goto done2;
2732                   DEC_BOTH (from, from_byte);
2733                 }
2734               goto done2;
2735
2736             case Smath:
2737               if (!sexpflag)
2738                 break;
2739               temp_pos = dec_bytepos (from_byte);
2740               UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2741               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2742                 DEC_BOTH (from, from_byte);
2743               if (mathexit)
2744                 {
2745                   mathexit = 0;
2746                   goto open2;
2747                 }
2748               mathexit = 1;
2749
2750             case Sclose:
2751               if (!++depth) goto done2;
2752               break;
2753
2754             case Sopen:
2755             open2:
2756               if (!--depth) goto done2;
2757               if (depth < min_depth)
2758                 xsignal3 (Qscan_error,
2759                           build_string ("Containing expression ends prematurely"),
2760                           make_number (last_good), make_number (from));
2761               break;
2762
2763             case Sendcomment:
2764               if (!parse_sexp_ignore_comments)
2765                 break;
2766               found = back_comment (from, from_byte, stop, comnested, comstyle,
2767                                     &out_charpos, &out_bytepos);
2768               /* FIXME:  if found == -1, then it really wasn't a comment-end.
2769                  For single-char Sendcomment, we can't do much about it apart
2770                  from skipping the char.
2771                  For 2-char endcomments, we could try again, taking both
2772                  chars as separate entities, but it's a lot of trouble
2773                  for very little gain, so we don't bother either.  -sm */
2774               if (found != -1)
2775                 from = out_charpos, from_byte = out_bytepos;
2776               break;
2777
2778             case Scomment_fence:
2779             case Sstring_fence:
2780               while (1)
2781                 {
2782                   if (from == stop)
2783                     goto lose;
2784                   DEC_BOTH (from, from_byte);
2785                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2786                   if (!char_quoted (from, from_byte)
2787                       && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2788                           SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2789                     break;
2790                 }
2791               if (code == Sstring_fence && !depth && sexpflag) goto done2;
2792               break;
2793
2794             case Sstring:
2795               stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2796               while (1)
2797                 {
2798                   if (from == stop)
2799                     goto lose;
2800                   DEC_BOTH (from, from_byte);
2801                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2802                   if (!char_quoted (from, from_byte)
2803                       && (stringterm
2804                           == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2805                       && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2806                     break;
2807                 }
2808               if (!depth && sexpflag) goto done2;
2809               break;
2810             default:
2811               /* Ignore whitespace, punctuation, quote, endcomment.  */
2812               break;
2813             }
2814         }
2815
2816       /* Reached start of buffer.  Error if within object, return nil if between */
2817       if (depth)
2818         goto lose;
2819
2820       immediate_quit = 0;
2821       return Qnil;
2822
2823     done2:
2824       count++;
2825     }
2826
2827
2828   immediate_quit = 0;
2829   XSETFASTINT (val, from);
2830   return val;
2831
2832  lose:
2833   xsignal3 (Qscan_error,
2834             build_string ("Unbalanced parentheses"),
2835             make_number (last_good), make_number (from));
2836 }
2837
2838 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2839        doc: /* Scan from character number FROM by COUNT lists.
2840 Returns the character number of the position thus found.
2841
2842 If DEPTH is nonzero, paren depth begins counting from that value,
2843 only places where the depth in parentheses becomes zero
2844 are candidates for stopping; COUNT such places are counted.
2845 Thus, a positive value for DEPTH means go out levels.
2846
2847 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2848
2849 If the beginning or end of (the accessible part of) the buffer is reached
2850 and the depth is wrong, an error is signaled.
2851 If the depth is right but the count is not used up, nil is returned.  */)
2852   (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
2853 {
2854   CHECK_NUMBER (from);
2855   CHECK_NUMBER (count);
2856   CHECK_NUMBER (depth);
2857
2858   return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2859 }
2860
2861 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2862        doc: /* Scan from character number FROM by COUNT balanced expressions.
2863 If COUNT is negative, scan backwards.
2864 Returns the character number of the position thus found.
2865
2866 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2867
2868 If the beginning or end of (the accessible part of) the buffer is reached
2869 in the middle of a parenthetical grouping, an error is signaled.
2870 If the beginning or end is reached between groupings
2871 but before count is used up, nil is returned.  */)
2872   (Lisp_Object from, Lisp_Object count)
2873 {
2874   CHECK_NUMBER (from);
2875   CHECK_NUMBER (count);
2876
2877   return scan_lists (XINT (from), XINT (count), 0, 1);
2878 }
2879
2880 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2881        0, 0, 0,
2882        doc: /* Move point backward over any number of chars with prefix syntax.
2883 This includes chars with "quote" or "prefix" syntax (' or p).  */)
2884   (void)
2885 {
2886   EMACS_INT beg = BEGV;
2887   EMACS_INT opoint = PT;
2888   EMACS_INT opoint_byte = PT_BYTE;
2889   EMACS_INT pos = PT;
2890   EMACS_INT pos_byte = PT_BYTE;
2891   int c;
2892
2893   if (pos <= beg)
2894     {
2895       SET_PT_BOTH (opoint, opoint_byte);
2896
2897       return Qnil;
2898     }
2899
2900   SETUP_SYNTAX_TABLE (pos, -1);
2901
2902   DEC_BOTH (pos, pos_byte);
2903
2904   while (!char_quoted (pos, pos_byte)
2905          /* Previous statement updates syntax table.  */
2906          && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2907              || SYNTAX_PREFIX (c)))
2908     {
2909       opoint = pos;
2910       opoint_byte = pos_byte;
2911
2912       if (pos + 1 > beg)
2913         DEC_BOTH (pos, pos_byte);
2914     }
2915
2916   SET_PT_BOTH (opoint, opoint_byte);
2917
2918   return Qnil;
2919 }
2920 \f
2921 /* Parse forward from FROM / FROM_BYTE to END,
2922    assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2923    and return a description of the state of the parse at END.
2924    If STOPBEFORE is nonzero, stop at the start of an atom.
2925    If COMMENTSTOP is 1, stop at the start of a comment.
2926    If COMMENTSTOP is -1, stop at the start or end of a comment,
2927    after the beginning of a string, or after the end of a string.  */
2928
2929 static void
2930 scan_sexps_forward (struct lisp_parse_state *stateptr,
2931                     EMACS_INT from, EMACS_INT from_byte, EMACS_INT end,
2932                     int targetdepth, int stopbefore,
2933                     Lisp_Object oldstate, int commentstop)
2934 {
2935   struct lisp_parse_state state;
2936
2937   register enum syntaxcode code;
2938   int c1;
2939   int comnested;
2940   struct level { int last, prev; };
2941   struct level levelstart[100];
2942   register struct level *curlevel = levelstart;
2943   struct level *endlevel = levelstart + 100;
2944   register int depth;   /* Paren depth of current scanning location.
2945                            level - levelstart equals this except
2946                            when the depth becomes negative.  */
2947   int mindepth;         /* Lowest DEPTH value seen.  */
2948   int start_quoted = 0;         /* Nonzero means starting after a char quote */
2949   Lisp_Object tem;
2950   EMACS_INT prev_from;          /* Keep one character before FROM.  */
2951   EMACS_INT prev_from_byte;
2952   int prev_from_syntax;
2953   int boundary_stop = commentstop == -1;
2954   int nofence;
2955   int found;
2956   EMACS_INT out_bytepos, out_charpos;
2957   int temp;
2958
2959   prev_from = from;
2960   prev_from_byte = from_byte;
2961   if (from != BEGV)
2962     DEC_BOTH (prev_from, prev_from_byte);
2963
2964   /* Use this macro instead of `from++'.  */
2965 #define INC_FROM                                \
2966 do { prev_from = from;                          \
2967      prev_from_byte = from_byte;                \
2968      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);   \
2969      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2970      INC_BOTH (from, from_byte);                \
2971      if (from < end)                            \
2972        UPDATE_SYNTAX_TABLE_FORWARD (from);      \
2973   } while (0)
2974
2975   immediate_quit = 1;
2976   QUIT;
2977
2978   if (NILP (oldstate))
2979     {
2980       depth = 0;
2981       state.instring = -1;
2982       state.incomment = 0;
2983       state.comstyle = 0;       /* comment style a by default.  */
2984       state.comstr_start = -1;  /* no comment/string seen.  */
2985     }
2986   else
2987     {
2988       tem = Fcar (oldstate);
2989       if (!NILP (tem))
2990         depth = XINT (tem);
2991       else
2992         depth = 0;
2993
2994       oldstate = Fcdr (oldstate);
2995       oldstate = Fcdr (oldstate);
2996       oldstate = Fcdr (oldstate);
2997       tem = Fcar (oldstate);
2998       /* Check whether we are inside string_fence-style string: */
2999       state.instring = (!NILP (tem)
3000                         ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
3001                         : -1);
3002
3003       oldstate = Fcdr (oldstate);
3004       tem = Fcar (oldstate);
3005       state.incomment = (!NILP (tem)
3006                          ? (INTEGERP (tem) ? XINT (tem) : -1)
3007                          : 0);
3008
3009       oldstate = Fcdr (oldstate);
3010       tem = Fcar (oldstate);
3011       start_quoted = !NILP (tem);
3012
3013       /* if the eighth element of the list is nil, we are in comment
3014          style a.  If it is non-nil, we are in comment style b */
3015       oldstate = Fcdr (oldstate);
3016       oldstate = Fcdr (oldstate);
3017       tem = Fcar (oldstate);
3018       state.comstyle = (NILP (tem)
3019                         ? 0
3020                         : (EQ (tem, Qsyntax_table)
3021                            ? ST_COMMENT_STYLE
3022                            : INTEGERP (tem) ? XINT (tem) : 1));
3023
3024       oldstate = Fcdr (oldstate);
3025       tem = Fcar (oldstate);
3026       state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
3027       oldstate = Fcdr (oldstate);
3028       tem = Fcar (oldstate);
3029       while (!NILP (tem))               /* >= second enclosing sexps.  */
3030         {
3031           /* curlevel++->last ran into compiler bug on Apollo */
3032           curlevel->last = XINT (Fcar (tem));
3033           if (++curlevel == endlevel)
3034             curlevel--; /* error ("Nesting too deep for parser"); */
3035           curlevel->prev = -1;
3036           curlevel->last = -1;
3037           tem = Fcdr (tem);
3038         }
3039     }
3040   state.quoted = 0;
3041   mindepth = depth;
3042
3043   curlevel->prev = -1;
3044   curlevel->last = -1;
3045
3046   SETUP_SYNTAX_TABLE (prev_from, 1);
3047   temp = FETCH_CHAR (prev_from_byte);
3048   prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3049   UPDATE_SYNTAX_TABLE_FORWARD (from);
3050
3051   /* Enter the loop at a place appropriate for initial state.  */
3052
3053   if (state.incomment)
3054     goto startincomment;
3055   if (state.instring >= 0)
3056     {
3057       nofence = state.instring != ST_STRING_STYLE;
3058       if (start_quoted)
3059         goto startquotedinstring;
3060       goto startinstring;
3061     }
3062   else if (start_quoted)
3063     goto startquoted;
3064
3065   while (from < end)
3066     {
3067       int syntax;
3068       INC_FROM;
3069       code = prev_from_syntax & 0xff;
3070
3071       if (from < end
3072           && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3073           && (c1 = FETCH_CHAR (from_byte),
3074               syntax = SYNTAX_WITH_FLAGS (c1),
3075               SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3076         /* Duplicate code to avoid a complex if-expression
3077            which causes trouble for the SGI compiler.  */
3078         {
3079           /* Record the comment style we have entered so that only
3080              the comment-end sequence of the same style actually
3081              terminates the comment section.  */
3082           state.comstyle
3083             = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3084           comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3085           comnested = comnested || SYNTAX_FLAGS_COMMENT_NESTED (syntax);
3086           state.incomment = comnested ? 1 : -1;
3087           state.comstr_start = prev_from;
3088           INC_FROM;
3089           code = Scomment;
3090         }
3091       else if (code == Scomment_fence)
3092         {
3093           /* Record the comment style we have entered so that only
3094              the comment-end sequence of the same style actually
3095              terminates the comment section.  */
3096           state.comstyle = ST_COMMENT_STYLE;
3097           state.incomment = -1;
3098           state.comstr_start = prev_from;
3099           code = Scomment;
3100         }
3101       else if (code == Scomment)
3102         {
3103           state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3104           state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3105                              1 : -1);
3106           state.comstr_start = prev_from;
3107         }
3108
3109       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3110         continue;
3111       switch (SWITCH_ENUM_CAST (code))
3112         {
3113         case Sescape:
3114         case Scharquote:
3115           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3116           curlevel->last = prev_from;
3117         startquoted:
3118           if (from == end) goto endquoted;
3119           INC_FROM;
3120           goto symstarted;
3121           /* treat following character as a word constituent */
3122         case Sword:
3123         case Ssymbol:
3124           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3125           curlevel->last = prev_from;
3126         symstarted:
3127           while (from < end)
3128             {
3129               /* Some compilers can't handle this inside the switch.  */
3130               temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3131               temp = SYNTAX (temp);
3132               switch (temp)
3133                 {
3134                 case Scharquote:
3135                 case Sescape:
3136                   INC_FROM;
3137                   if (from == end) goto endquoted;
3138                   break;
3139                 case Sword:
3140                 case Ssymbol:
3141                 case Squote:
3142                   break;
3143                 default:
3144                   goto symdone;
3145                 }
3146               INC_FROM;
3147             }
3148         symdone:
3149           curlevel->prev = curlevel->last;
3150           break;
3151
3152         case Scomment_fence: /* Can't happen because it's handled above.  */
3153         case Scomment:
3154           if (commentstop || boundary_stop) goto done;
3155         startincomment:
3156           /* The (from == BEGV) test was to enter the loop in the middle so
3157              that we find a 2-char comment ender even if we start in the
3158              middle of it.  We don't want to do that if we're just at the
3159              beginning of the comment (think of (*) ... (*)).  */
3160           found = forw_comment (from, from_byte, end,
3161                                 state.incomment, state.comstyle,
3162                                 (from == BEGV || from < state.comstr_start + 3)
3163                                 ? 0 : prev_from_syntax,
3164                                 &out_charpos, &out_bytepos, &state.incomment);
3165           from = out_charpos; from_byte = out_bytepos;
3166           /* Beware!  prev_from and friends are invalid now.
3167              Luckily, the `done' doesn't use them and the INC_FROM
3168              sets them to a sane value without looking at them. */
3169           if (!found) goto done;
3170           INC_FROM;
3171           state.incomment = 0;
3172           state.comstyle = 0;   /* reset the comment style */
3173           if (boundary_stop) goto done;
3174           break;
3175
3176         case Sopen:
3177           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3178           depth++;
3179           /* curlevel++->last ran into compiler bug on Apollo */
3180           curlevel->last = prev_from;
3181           if (++curlevel == endlevel)
3182             curlevel--; /* error ("Nesting too deep for parser"); */
3183           curlevel->prev = -1;
3184           curlevel->last = -1;
3185           if (targetdepth == depth) goto done;
3186           break;
3187
3188         case Sclose:
3189           depth--;
3190           if (depth < mindepth)
3191             mindepth = depth;
3192           if (curlevel != levelstart)
3193             curlevel--;
3194           curlevel->prev = curlevel->last;
3195           if (targetdepth == depth) goto done;
3196           break;
3197
3198         case Sstring:
3199         case Sstring_fence:
3200           state.comstr_start = from - 1;
3201           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3202           curlevel->last = prev_from;
3203           state.instring = (code == Sstring
3204                             ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3205                             : ST_STRING_STYLE);
3206           if (boundary_stop) goto done;
3207         startinstring:
3208           {
3209             nofence = state.instring != ST_STRING_STYLE;
3210
3211             while (1)
3212               {
3213                 int c;
3214
3215                 if (from >= end) goto done;
3216                 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3217                 /* Some compilers can't handle this inside the switch.  */
3218                 temp = SYNTAX (c);
3219
3220                 /* Check TEMP here so that if the char has
3221                    a syntax-table property which says it is NOT
3222                    a string character, it does not end the string.  */
3223                 if (nofence && c == state.instring && temp == Sstring)
3224                   break;
3225
3226                 switch (temp)
3227                   {
3228                   case Sstring_fence:
3229                     if (!nofence) goto string_end;
3230                     break;
3231                   case Scharquote:
3232                   case Sescape:
3233                     INC_FROM;
3234                   startquotedinstring:
3235                     if (from >= end) goto endquoted;
3236                   }
3237                 INC_FROM;
3238               }
3239           }
3240         string_end:
3241           state.instring = -1;
3242           curlevel->prev = curlevel->last;
3243           INC_FROM;
3244           if (boundary_stop) goto done;
3245           break;
3246
3247         case Smath:
3248           /* FIXME: We should do something with it.  */
3249           break;
3250         default:
3251           /* Ignore whitespace, punctuation, quote, endcomment.  */
3252           break;
3253         }
3254     }
3255   goto done;
3256
3257  stop:   /* Here if stopping before start of sexp. */
3258   from = prev_from;    /* We have just fetched the char that starts it; */
3259   goto done; /* but return the position before it. */
3260
3261  endquoted:
3262   state.quoted = 1;
3263  done:
3264   state.depth = depth;
3265   state.mindepth = mindepth;
3266   state.thislevelstart = curlevel->prev;
3267   state.prevlevelstart
3268     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3269   state.location = from;
3270   state.levelstarts = Qnil;
3271   while (curlevel > levelstart)
3272     state.levelstarts = Fcons (make_number ((--curlevel)->last),
3273                                state.levelstarts);
3274   immediate_quit = 0;
3275
3276   *stateptr = state;
3277 }
3278
3279 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3280        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3281 Parsing stops at TO or when certain criteria are met;
3282  point is set to where parsing stops.
3283 If fifth arg OLDSTATE is omitted or nil,
3284  parsing assumes that FROM is the beginning of a function.
3285 Value is a list of elements describing final state of parsing:
3286  0. depth in parens.
3287  1. character address of start of innermost containing list; nil if none.
3288  2. character address of start of last complete sexp terminated.
3289  3. non-nil if inside a string.
3290     (it is the character that will terminate the string,
3291      or t if the string should be terminated by a generic string delimiter.)
3292  4. nil if outside a comment, t if inside a non-nestable comment,
3293     else an integer (the current comment nesting).
3294  5. t if following a quote character.
3295  6. the minimum paren-depth encountered during this scan.
3296  7. style of comment, if any.
3297  8. character address of start of comment or string; nil if not in one.
3298  9. Intermediate data for continuation of parsing (subject to change).
3299 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3300 in parentheses becomes equal to TARGETDEPTH.
3301 Fourth arg STOPBEFORE non-nil means stop when come to
3302  any character that starts a sexp.
3303 Fifth arg OLDSTATE is a list like what this function returns.
3304  It is used to initialize the state of the parse.  Elements number 1, 2, 6
3305  and 8 are ignored.
3306 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3307  If it is symbol `syntax-table', stop after the start of a comment or a
3308  string, or after end of a comment or a string.  */)
3309   (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth, Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3310 {
3311   struct lisp_parse_state state;
3312   int target;
3313
3314   if (!NILP (targetdepth))
3315     {
3316       CHECK_NUMBER (targetdepth);
3317       target = XINT (targetdepth);
3318     }
3319   else
3320     target = -100000;           /* We won't reach this depth */
3321
3322   validate_region (&from, &to);
3323   scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3324                       XINT (to),
3325                       target, !NILP (stopbefore), oldstate,
3326                       (NILP (commentstop)
3327                        ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3328
3329   SET_PT (state.location);
3330
3331   return Fcons (make_number (state.depth),
3332            Fcons (state.prevlevelstart < 0
3333                   ? Qnil : make_number (state.prevlevelstart),
3334              Fcons (state.thislevelstart < 0
3335                     ? Qnil : make_number (state.thislevelstart),
3336                Fcons (state.instring >= 0
3337                       ? (state.instring == ST_STRING_STYLE
3338                          ? Qt : make_number (state.instring)) : Qnil,
3339                  Fcons (state.incomment < 0 ? Qt :
3340                         (state.incomment == 0 ? Qnil :
3341                          make_number (state.incomment)),
3342                    Fcons (state.quoted ? Qt : Qnil,
3343                      Fcons (make_number (state.mindepth),
3344                        Fcons ((state.comstyle
3345                                ? (state.comstyle == ST_COMMENT_STYLE
3346                                   ? Qsyntax_table
3347                                   : make_number (state.comstyle))
3348                                : Qnil),
3349                               Fcons (((state.incomment
3350                                        || (state.instring >= 0))
3351                                       ? make_number (state.comstr_start)
3352                                       : Qnil),
3353                                      Fcons (state.levelstarts, Qnil))))))))));
3354 }
3355 \f
3356 void
3357 init_syntax_once (void)
3358 {
3359   register int i, c;
3360   Lisp_Object temp;
3361
3362   /* This has to be done here, before we call Fmake_char_table.  */
3363   Qsyntax_table = intern_c_string ("syntax-table");
3364   staticpro (&Qsyntax_table);
3365
3366   /* Intern_C_String this now in case it isn't already done.
3367      Setting this variable twice is harmless.
3368      But don't staticpro it here--that is done in alloc.c.  */
3369   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3370
3371   /* Create objects which can be shared among syntax tables.  */
3372   Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3373   for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3374     XVECTOR (Vsyntax_code_object)->contents[i]
3375       = Fcons (make_number (i), Qnil);
3376
3377   /* Now we are ready to set up this property, so we can
3378      create syntax tables.  */
3379   Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3380
3381   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3382
3383   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3384
3385   /* Control characters should not be whitespace.  */
3386   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3387   for (i = 0; i <= ' ' - 1; i++)
3388     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3389   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3390
3391   /* Except that a few really are whitespace.  */
3392   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3393   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3394   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3395   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3396   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3397   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3398
3399   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3400   for (i = 'a'; i <= 'z'; i++)
3401     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3402   for (i = 'A'; i <= 'Z'; i++)
3403     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3404   for (i = '0'; i <= '9'; i++)
3405     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3406
3407   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3408   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3409
3410   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3411                         Fcons (make_number (Sopen), make_number (')')));
3412   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3413                         Fcons (make_number (Sclose), make_number ('(')));
3414   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3415                         Fcons (make_number (Sopen), make_number (']')));
3416   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3417                         Fcons (make_number (Sclose), make_number ('[')));
3418   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3419                         Fcons (make_number (Sopen), make_number ('}')));
3420   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3421                         Fcons (make_number (Sclose), make_number ('{')));
3422   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3423                         Fcons (make_number ((int) Sstring), Qnil));
3424   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3425                         Fcons (make_number ((int) Sescape), Qnil));
3426
3427   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3428   for (i = 0; i < 10; i++)
3429     {
3430       c = "_-+*/&|<>="[i];
3431       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3432     }
3433
3434   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3435   for (i = 0; i < 12; i++)
3436     {
3437       c = ".,;:?!#@~^'`"[i];
3438       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3439     }
3440
3441   /* All multibyte characters have syntax `word' by default.  */
3442   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3443   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3444 }
3445
3446 void
3447 syms_of_syntax (void)
3448 {
3449   Qsyntax_table_p = intern_c_string ("syntax-table-p");
3450   staticpro (&Qsyntax_table_p);
3451
3452   staticpro (&Vsyntax_code_object);
3453
3454   staticpro (&gl_state.object);
3455   staticpro (&gl_state.global_code);
3456   staticpro (&gl_state.current_syntax_table);
3457   staticpro (&gl_state.old_prop);
3458
3459   /* Defined in regex.c */
3460   staticpro (&re_match_object);
3461
3462   Qscan_error = intern_c_string ("scan-error");
3463   staticpro (&Qscan_error);
3464   Fput (Qscan_error, Qerror_conditions,
3465         pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
3466   Fput (Qscan_error, Qerror_message,
3467         make_pure_c_string ("Scan error"));
3468
3469   DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3470                doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3471
3472   DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3473                doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3474 Otherwise, that text property is simply ignored.
3475 See the info node `(elisp)Syntax Properties' for a description of the
3476 `syntax-table' property.  */);
3477
3478   words_include_escapes = 0;
3479   DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3480                doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3481
3482   DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3483                doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3484   multibyte_syntax_as_symbol = 0;
3485
3486   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3487                open_paren_in_column_0_is_defun_start,
3488                doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3489   open_paren_in_column_0_is_defun_start = 1;
3490
3491
3492   DEFVAR_LISP ("find-word-boundary-function-table",
3493                Vfind_word_boundary_function_table,
3494                doc: /*
3495 Char table of functions to search for the word boundary.
3496 Each function is called with two arguments; POS and LIMIT.
3497 POS and LIMIT are character positions in the current buffer.
3498
3499 If POS is less than LIMIT, POS is at the first character of a word,
3500 and the return value of a function is a position after the last
3501 character of that word.
3502
3503 If POS is not less than LIMIT, POS is at the last character of a word,
3504 and the return value of a function is a position at the first
3505 character of that word.
3506
3507 In both cases, LIMIT bounds the search. */);
3508   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3509
3510   defsubr (&Ssyntax_table_p);
3511   defsubr (&Ssyntax_table);
3512   defsubr (&Sstandard_syntax_table);
3513   defsubr (&Scopy_syntax_table);
3514   defsubr (&Sset_syntax_table);
3515   defsubr (&Schar_syntax);
3516   defsubr (&Smatching_paren);
3517   defsubr (&Sstring_to_syntax);
3518   defsubr (&Smodify_syntax_entry);
3519   defsubr (&Sinternal_describe_syntax_value);
3520
3521   defsubr (&Sforward_word);
3522
3523   defsubr (&Sskip_chars_forward);
3524   defsubr (&Sskip_chars_backward);
3525   defsubr (&Sskip_syntax_forward);
3526   defsubr (&Sskip_syntax_backward);
3527
3528   defsubr (&Sforward_comment);
3529   defsubr (&Sscan_lists);
3530   defsubr (&Sscan_sexps);
3531   defsubr (&Sbackward_prefix_chars);
3532   defsubr (&Sparse_partial_sexp);
3533 }