X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/cb6792d2dbc65e1fcf63a45e82b3d8ac35e8313f..429e7e2d69dd0fab7389dc4f7fc6e95d3e4bd612:/src/search.c diff --git a/src/search.c b/src/search.c index 225155d73a..c60d68b937 100644 --- a/src/search.c +++ b/src/search.c @@ -1,5 +1,5 @@ /* String search routines for GNU Emacs. - Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc. + Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -20,9 +20,6 @@ Boston, MA 02111-1307, USA. */ #include -#ifdef STDC_HEADERS -#include -#endif #include "lisp.h" #include "syntax.h" #include "category.h" @@ -36,9 +33,6 @@ Boston, MA 02111-1307, USA. */ #include #include "regex.h" -#define min(a, b) ((a) < (b) ? (a) : (b)) -#define max(a, b) ((a) > (b) ? (a) : (b)) - #define REGEXP_CACHE_SIZE 20 /* If the regexp is non-nil, then the buffer contains the compiled form @@ -100,12 +94,6 @@ matcher_overflow () error ("Stack overflow in regexp matcher"); } -#ifdef __STDC__ -#define CONST const -#else -#define CONST -#endif - /* Compile a regexp and signal a Lisp error if anything goes wrong. PATTERN is the pattern to compile. CP is the place to put the result. @@ -140,16 +128,16 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) if (multibyte == STRING_MULTIBYTE (pattern)) { - raw_pattern = (unsigned char *) XSTRING (pattern)->data; - raw_pattern_size = STRING_BYTES (XSTRING (pattern)); + raw_pattern = (unsigned char *) SDATA (pattern); + raw_pattern_size = SBYTES (pattern); } else if (multibyte) { - raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data, - XSTRING (pattern)->size); + raw_pattern_size = count_size_as_multibyte (SDATA (pattern), + SCHARS (pattern)); raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); - copy_text (XSTRING (pattern)->data, raw_pattern, - XSTRING (pattern)->size, 0, 1); + copy_text (SDATA (pattern), raw_pattern, + SCHARS (pattern), 0, 1); } else { @@ -159,10 +147,10 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) by subtracting nonascii-insert-offset from each non-ASCII char, so that only the multibyte chars which really correspond to the chosen single-byte character set can possibly match. */ - raw_pattern_size = XSTRING (pattern)->size; + raw_pattern_size = SCHARS (pattern); raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); - copy_text (XSTRING (pattern)->data, raw_pattern, - STRING_BYTES (XSTRING (pattern)), 1, 0); + copy_text (SDATA (pattern), raw_pattern, + SBYTES (pattern), 1, 0); } cp->regexp = Qnil; @@ -182,6 +170,23 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) cp->regexp = Fcopy_sequence (pattern); } +/* Shrink each compiled regexp buffer in the cache + to the size actually used right now. + This is called from garbage collection. */ + +void +shrink_regexp_cache () +{ + struct regexp_cache *cp; + + for (cp = searchbuf_head; cp != 0; cp = cp->next) + { + cp->buf.allocated = cp->buf.used; + cp->buf.buffer + = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used); + } +} + /* Compile a regexp if necessary, but first check to see if there's one in the cache. PATTERN is the pattern to compile. @@ -205,16 +210,27 @@ compile_pattern (pattern, regp, translate, posix, multibyte) for (cpp = &searchbuf_head; ; cpp = &cp->next) { cp = *cpp; - if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size + /* Entries are initialized to nil, and may be set to nil by + compile_pattern_1 if the pattern isn't valid. Don't apply + string accessors in those cases. However, compile_pattern_1 + is only applied to the cache entry we pick here to reuse. So + nil should never appear before a non-nil entry. */ + if (NILP (cp->regexp)) + goto compile_it; + if (SCHARS (cp->regexp) == SCHARS (pattern) + && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) && !NILP (Fstring_equal (cp->regexp, pattern)) && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) && cp->posix == posix && cp->buf.multibyte == multibyte) break; - /* If we're at the end of the cache, compile into the last cell. */ + /* If we're at the end of the cache, compile into the nil cell + we found, or the last (least recently used) cell with a + string value. */ if (cp->next == 0) { + compile_it: compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte); break; } @@ -260,7 +276,7 @@ looking_at_1 (string, posix) if (running_asynch_code) save_search_regs (); - CHECK_STRING (string, 0); + CHECK_STRING (string); bufp = compile_pattern (string, &search_regs, (!NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : Qnil), @@ -290,10 +306,12 @@ looking_at_1 (string, posix) } re_match_object = Qnil; - + i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2, PT_BYTE - BEGV_BYTE, &search_regs, ZV_BYTE - BEGV_BYTE); + immediate_quit = 0; + if (i == -2) matcher_overflow (); @@ -308,28 +326,27 @@ looking_at_1 (string, posix) = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); } XSETBUFFER (last_thing_searched, current_buffer); - immediate_quit = 0; return val; } DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0, - "Return t if text after point matches regular expression REGEXP.\n\ -This function modifies the match data that `match-beginning',\n\ -`match-end' and `match-data' access; save and restore the match\n\ -data if you want to preserve them.") - (regexp) + doc: /* Return t if text after point matches regular expression REGEXP. +This function modifies the match data that `match-beginning', +`match-end' and `match-data' access; save and restore the match +data if you want to preserve them. */) + (regexp) Lisp_Object regexp; { return looking_at_1 (regexp, 0); } DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0, - "Return t if text after point matches regular expression REGEXP.\n\ -Find the longest match, in accord with Posix regular expression rules.\n\ -This function modifies the match data that `match-beginning',\n\ -`match-end' and `match-data' access; save and restore the match\n\ -data if you want to preserve them.") - (regexp) + doc: /* Return t if text after point matches regular expression REGEXP. +Find the longest match, in accord with Posix regular expression rules. +This function modifies the match data that `match-beginning', +`match-end' and `match-data' access; save and restore the match +data if you want to preserve them. */) + (regexp) Lisp_Object regexp; { return looking_at_1 (regexp, 1); @@ -348,16 +365,16 @@ string_match_1 (regexp, string, start, posix) if (running_asynch_code) save_search_regs (); - CHECK_STRING (regexp, 0); - CHECK_STRING (string, 1); + CHECK_STRING (regexp); + CHECK_STRING (string); if (NILP (start)) pos = 0, pos_byte = 0; else { - int len = XSTRING (string)->size; + int len = SCHARS (string); - CHECK_NUMBER (start, 2); + CHECK_NUMBER (start); pos = XINT (start); if (pos < 0 && -pos <= len) pos = len + pos; @@ -373,10 +390,10 @@ string_match_1 (regexp, string, start, posix) STRING_MULTIBYTE (string)); immediate_quit = 1; re_match_object = string; - - val = re_search (bufp, (char *) XSTRING (string)->data, - STRING_BYTES (XSTRING (string)), pos_byte, - STRING_BYTES (XSTRING (string)) - pos_byte, + + val = re_search (bufp, (char *) SDATA (string), + SBYTES (string), pos_byte, + SBYTES (string) - pos_byte, &search_regs); immediate_quit = 0; last_thing_searched = Qt; @@ -397,25 +414,30 @@ string_match_1 (regexp, string, start, posix) } DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0, - "Return index of start of first match for REGEXP in STRING, or nil.\n\ -If third arg START is non-nil, start search at that index in STRING.\n\ -For index of first char beyond the match, do (match-end 0).\n\ -`match-end' and `match-beginning' also give indices of substrings\n\ -matched by parenthesis constructs in the pattern.") - (regexp, string, start) + doc: /* Return index of start of first match for REGEXP in STRING, or nil. +Case is ignored if `case-fold-search' is non-nil in the current buffer. +If third arg START is non-nil, start search at that index in STRING. +For index of first char beyond the match, do (match-end 0). +`match-end' and `match-beginning' also give indices of substrings +matched by parenthesis constructs in the pattern. + +You can use the function `match-string' to extract the substrings +matched by the parenthesis constructions in REGEXP. */) + (regexp, string, start) Lisp_Object regexp, string, start; { return string_match_1 (regexp, string, start, 0); } DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0, - "Return index of start of first match for REGEXP in STRING, or nil.\n\ -Find the longest match, in accord with Posix regular expression rules.\n\ -If third arg START is non-nil, start search at that index in STRING.\n\ -For index of first char beyond the match, do (match-end 0).\n\ -`match-end' and `match-beginning' also give indices of substrings\n\ -matched by parenthesis constructs in the pattern.") - (regexp, string, start) + doc: /* Return index of start of first match for REGEXP in STRING, or nil. +Find the longest match, in accord with Posix regular expression rules. +Case is ignored if `case-fold-search' is non-nil in the current buffer. +If third arg START is non-nil, start search at that index in STRING. +For index of first char beyond the match, do (match-end 0). +`match-end' and `match-beginning' also give indices of substrings +matched by parenthesis constructs in the pattern. */) + (regexp, string, start) Lisp_Object regexp, string, start; { return string_match_1 (regexp, string, start, 1); @@ -436,10 +458,10 @@ fast_string_match (regexp, string) 0, STRING_MULTIBYTE (string)); immediate_quit = 1; re_match_object = string; - - val = re_search (bufp, (char *) XSTRING (string)->data, - STRING_BYTES (XSTRING (string)), 0, - STRING_BYTES (XSTRING (string)), 0); + + val = re_search (bufp, (char *) SDATA (string), + SBYTES (string), 0, + SBYTES (string), 0); immediate_quit = 0; return val; } @@ -454,7 +476,7 @@ extern Lisp_Object Vascii_downcase_table; int fast_c_string_match_ignore_case (regexp, string) Lisp_Object regexp; - char *string; + const char *string; { int val; struct re_pattern_buffer *bufp; @@ -528,7 +550,7 @@ scan_buffer (target, start, end, count, shortage, allow_quit) int allow_quit; { struct region_cache *newline_cache; - int direction; + int direction; if (count > 0) { @@ -589,7 +611,7 @@ scan_buffer (target, start, end, count, shortage, allow_quit) ceiling_byte = min (tem, ceiling_byte); { - /* The termination address of the dumb loop. */ + /* The termination address of the dumb loop. */ register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte) + 1; register unsigned char *cursor @@ -733,11 +755,6 @@ scan_newline (start, start_byte, limit, limit_byte, count, allow_quit) int old_immediate_quit = immediate_quit; - /* If we are not in selective display mode, - check only for newlines. */ - int selective_display = (!NILP (current_buffer->selective_display) - && !INTEGERP (current_buffer->selective_display)); - /* The code that follows is like scan_buffer but checks for either newline or carriage return. */ @@ -839,7 +856,7 @@ find_before_next_newline (from, to, cnt) if (shortage == 0) pos--; - + return pos; } @@ -858,11 +875,11 @@ search_command (string, bound, noerror, count, direction, RE, posix) if (!NILP (count)) { - CHECK_NUMBER (count, 3); + CHECK_NUMBER (count); n *= XINT (count); } - CHECK_STRING (string, 0); + CHECK_STRING (string); if (NILP (bound)) { if (n > 0) @@ -872,7 +889,7 @@ search_command (string, bound, noerror, count, direction, RE, posix) } else { - CHECK_NUMBER_COERCE_MARKER (bound, 1); + CHECK_NUMBER_COERCE_MARKER (bound); lim = XINT (bound); if (n > 0 ? lim < PT : lim > PT) error ("Invalid search bound (wrong side of point)"); @@ -925,9 +942,8 @@ static int trivial_regexp_p (regexp) Lisp_Object regexp; { - int len = STRING_BYTES (XSTRING (regexp)); - unsigned char *s = XSTRING (regexp)->data; - unsigned char c; + int len = SBYTES (regexp); + unsigned char *s = SDATA (regexp); while (--len >= 0) { switch (*s++) @@ -941,7 +957,7 @@ trivial_regexp_p (regexp) { case '|': case '(': case ')': case '`': case '\'': case 'b': case 'B': case '<': case '>': case 'w': case 'W': case 's': - case 'S': case '=': + case 'S': case '=': case '{': case '}': case 'c': case 'C': /* for categoryspec and notcategoryspec */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -998,24 +1014,21 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, Lisp_Object inverse_trt; int posix; { - int len = XSTRING (string)->size; - int len_byte = STRING_BYTES (XSTRING (string)); + int len = SCHARS (string); + int len_byte = SBYTES (string); register int i; if (running_asynch_code) save_search_regs (); + /* Searching 0 times means don't move. */ /* Null string is found at starting position. */ - if (len == 0) + if (len == 0 || n == 0) { - set_search_regs (pos, 0); + set_search_regs (pos_byte, 0); return pos; } - /* Searching 0 times means don't move. */ - if (n == 0) - return pos; - if (RE && !trivial_regexp_p (string)) { unsigned char *p1, *p2; @@ -1049,7 +1062,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, s2 = 0; } re_match_object = Qnil; - + while (n < 0) { int val; @@ -1126,9 +1139,9 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, int raw_pattern_size_byte; unsigned char *patbuf; int multibyte = !NILP (current_buffer->enable_multibyte_characters); - unsigned char *base_pat = XSTRING (string)->data; + unsigned char *base_pat = SDATA (string); int charset_base = -1; - int simple = 1; + int boyer_moore_ok = 1; /* MULTIBYTE says whether the text to be searched is multibyte. We must convert PATTERN to match that, or we will not really @@ -1136,19 +1149,19 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, if (multibyte == STRING_MULTIBYTE (string)) { - raw_pattern = (unsigned char *) XSTRING (string)->data; - raw_pattern_size = XSTRING (string)->size; - raw_pattern_size_byte = STRING_BYTES (XSTRING (string)); + raw_pattern = (unsigned char *) SDATA (string); + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte = SBYTES (string); } else if (multibyte) { - raw_pattern_size = XSTRING (string)->size; + raw_pattern_size = SCHARS (string); raw_pattern_size_byte - = count_size_as_multibyte (XSTRING (string)->data, + = count_size_as_multibyte (SDATA (string), raw_pattern_size); raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1); - copy_text (XSTRING (string)->data, raw_pattern, - XSTRING (string)->size, 0, 1); + copy_text (SDATA (string), raw_pattern, + SCHARS (string), 0, 1); } else { @@ -1158,11 +1171,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, by subtracting nonascii-insert-offset from each non-ASCII char, so that only the multibyte chars which really correspond to the chosen single-byte character set can possibly match. */ - raw_pattern_size = XSTRING (string)->size; - raw_pattern_size_byte = XSTRING (string)->size; + raw_pattern_size = SCHARS (string); + raw_pattern_size_byte = SCHARS (string); raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); - copy_text (XSTRING (string)->data, raw_pattern, - STRING_BYTES (XSTRING (string)), 1, 0); + copy_text (SDATA (string), raw_pattern, + SBYTES (string), 1, 0); } /* Copy and optionally translate the pattern. */ @@ -1175,7 +1188,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, { while (--len >= 0) { - unsigned char workbuf[4], *str; + unsigned char str[MAX_MULTIBYTE_LENGTH]; int c, translated, inverse; int in_charlen, charlen; @@ -1190,17 +1203,26 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); + /* Translate the character, if requested. */ TRANSLATE (translated, trt, c); /* If translation changed the byte-length, go back to the original character. */ - charlen = CHAR_STRING (translated, workbuf, str); + charlen = CHAR_STRING (translated, str); if (in_charlen != charlen) { translated = c; - charlen = CHAR_STRING (c, workbuf, str); + charlen = CHAR_STRING (c, str); } + /* If we are searching for something strange, + an invalid multibyte code, don't use boyer-moore. */ + if (! ASCII_BYTE_P (translated) + && (charlen == 1 /* 8bit code */ + || charlen != in_charlen /* invalid multibyte code */ + )) + boyer_moore_ok = 0; + TRANSLATE (inverse, inverse_trt, c); /* Did this char actually get translated? @@ -1209,15 +1231,17 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, { /* Keep track of which character set row contains the characters that need translation. */ - int charset_base_code = c & ~0xff; - if (charset_base == -1) + int charset_base_code = c & ~CHAR_FIELD3_MASK; + int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK; + + if (charset_base_code != inverse_charset_base) + boyer_moore_ok = 0; + else if (charset_base == -1) charset_base = charset_base_code; else if (charset_base != charset_base_code) /* If two different rows appear, needing translation, then we cannot use boyer_moore search. */ - simple = 0; - /* ??? Handa: this must do simple = 0 - if c is a composite character. */ + boyer_moore_ok = 0; } /* Store this character into the translated pattern. */ @@ -1229,9 +1253,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } else { + /* Unibyte buffer. */ + charset_base = 0; while (--len >= 0) { - int c, translated, inverse; + int c, translated; /* If we got here and the RE flag is set, it's because we're dealing with a regexp known to be trivial, so the backslash @@ -1243,22 +1269,6 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } c = *base_pat++; TRANSLATE (translated, trt, c); - TRANSLATE (inverse, inverse_trt, c); - - /* Did this char actually get translated? - Would any other char get translated into it? */ - if (translated != c || inverse != c) - { - /* Keep track of which character set row - contains the characters that need translation. */ - int charset_base_code = c & ~0xff; - if (charset_base == -1) - charset_base = charset_base_code; - else if (charset_base != charset_base_code) - /* If two different rows appear, needing translation, - then we cannot use boyer_moore search. */ - simple = 0; - } *pat++ = translated; } } @@ -1267,7 +1277,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, len = raw_pattern_size; pat = base_pat = patbuf; - if (simple) + if (boyer_moore_ok) return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, pos, pos_byte, lim, lim_byte, charset_base); @@ -1515,17 +1525,17 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, { int direction = ((n > 0) ? 1 : -1); register int dirlen; - int infinity, limit, k, stride_for_teases; + int infinity, limit, stride_for_teases = 0; register int *BM_tab; int *BM_tab_base; - register unsigned char *cursor, *p_limit; + register unsigned char *cursor, *p_limit; register int i, j; unsigned char *pat, *pat_end; int multibyte = ! NILP (current_buffer->enable_multibyte_characters); unsigned char simple_translate[0400]; - int translate_prev_byte; - int translate_anteprev_byte; + int translate_prev_byte = 0; + int translate_anteprev_byte = 0; #ifdef C_ALLOCA int BM_tab_space[0400]; @@ -1551,14 +1561,14 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, /* a single test, a test for having gone past the end of the */ /* permissible match region, to test for both possible matches (when */ /* the stride goes past the end immediately) and failure to */ - /* match (where you get nudged past the end one stride at a time). */ + /* match (where you get nudged past the end one stride at a time). */ /* Here we make a "mickey mouse" BM table. The stride of the search */ /* is determined only by the last character of the putative match. */ /* If that character does not match, we will stride the proper */ /* distance to propose a match that superimposes it on the last */ /* instance of a character that matches it (per trt), or misses */ - /* it entirely if there is none. */ + /* it entirely if there is none. */ dirlen = len_byte * direction; infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction; @@ -1612,7 +1622,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, while (! CHAR_HEAD_P (*charstart)) charstart--; untranslated = STRING_CHAR (charstart, ptr - charstart + 1); - if (charset_base == (untranslated & ~0xff)) + if (charset_base == (untranslated & ~CHAR_FIELD3_MASK)) { TRANSLATE (ch, trt, untranslated); if (! CHAR_HEAD_P (*ptr)) @@ -1646,7 +1656,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, BM_tab[j] = dirlen - i; /* A translation table is accompanied by its inverse -- see */ - /* comment following downcase_table for details */ + /* comment following downcase_table for details */ if (this_translated) { int starting_ch = ch; @@ -1834,7 +1844,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, /* This loop can be coded for space rather than */ /* speed because it will usually run only once. */ /* (the reach is at most len + 21, and typically */ - /* does not exceed len) */ + /* does not exceed len) */ while ((limit - pos_byte) * direction >= 0) pos_byte += BM_tab[FETCH_BYTE (pos_byte)]; /* now run the same tests to distinguish going off the */ @@ -1896,12 +1906,15 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, } /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES - for a match just found in the current buffer. */ + for the overall match just found in the current buffer. + Also clear out the match data for registers 1 and up. */ static void set_search_regs (beg_byte, nbytes) int beg_byte, nbytes; { + int i; + /* Make sure we have registers in which to store the match position. */ if (search_regs.num_regs == 0) @@ -1911,6 +1924,13 @@ set_search_regs (beg_byte, nbytes) search_regs.num_regs = 2; } + /* Clear out the other registers. */ + for (i = 1; i < search_regs.num_regs; i++) + { + search_regs.start[i] = -1; + search_regs.end[i] = -1; + } + search_regs.start[0] = BYTE_TO_CHAR (beg_byte); search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes); XSETBUFFER (last_thing_searched, current_buffer); @@ -1930,18 +1950,15 @@ wordify (string) int prev_c = 0; int adjust; - CHECK_STRING (string, 0); - p = XSTRING (string)->data; - len = XSTRING (string)->size; + CHECK_STRING (string); + p = SDATA (string); + len = SCHARS (string); for (i = 0, i_byte = 0; i < len; ) { int c; - - if (STRING_MULTIBYTE (string)) - FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); - else - c = XSTRING (string)->data[i++]; + + FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); if (SYNTAX (c) != Sword) { @@ -1956,13 +1973,17 @@ wordify (string) if (SYNTAX (prev_c) == Sword) word_count++; if (!word_count) - return build_string (""); + return empty_string; adjust = - punct_count + 5 * (word_count - 1) + 4; - val = make_uninit_multibyte_string (len + adjust, - STRING_BYTES (XSTRING (string)) + adjust); + if (STRING_MULTIBYTE (string)) + val = make_uninit_multibyte_string (len + adjust, + SBYTES (string) + + adjust); + else + val = make_uninit_string (len + adjust); - o = XSTRING (val)->data; + o = SDATA (val); *o++ = '\\'; *o++ = 'b'; prev_c = 0; @@ -1971,15 +1992,12 @@ wordify (string) { int c; int i_byte_orig = i_byte; - - if (STRING_MULTIBYTE (string)) - FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); - else - c = XSTRING (string)->data[i++]; + + FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); if (SYNTAX (c) == Sword) { - bcopy (&XSTRING (string)->data[i_byte_orig], o, + bcopy (SDATA (string) + i_byte_orig, o, i_byte - i_byte_orig); o += i_byte - i_byte_orig; } @@ -2002,181 +2020,202 @@ wordify (string) } DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4, - "MSearch backward: ", - "Search backward from point for STRING.\n\ -Set point to the beginning of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend before that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, position at limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (string, bound, noerror, count) + "MSearch backward: ", + doc: /* Search backward from point for STRING. +Set point to the beginning of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend before that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, position at limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. + +Search case-sensitivity is determined by the value of the variable +`case-fold-search', which see. + +See also the functions `match-beginning', `match-end' and `replace-match'. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (string, bound, noerror, count, -1, 0, 0); } DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ", - "Search forward from point for STRING.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position. nil is equivalent\n\ - to (point-max).\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (string, bound, noerror, count) + doc: /* Search forward from point for STRING. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. nil is equivalent + to (point-max). +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. + +Search case-sensitivity is determined by the value of the variable +`case-fold-search', which see. + +See also the functions `match-beginning', `match-end' and `replace-match'. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (string, bound, noerror, count, 1, 0, 0); } DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4, - "sWord search backward: ", - "Search backward from point for STRING, ignoring differences in punctuation.\n\ -Set point to the beginning of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend before that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.") - (string, bound, noerror, count) + "sWord search backward: ", + doc: /* Search backward from point for STRING, ignoring differences in punctuation. +Set point to the beginning of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend before that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (wordify (string), bound, noerror, count, -1, 1, 0); } DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4, - "sWord search: ", - "Search forward from point for STRING, ignoring differences in punctuation.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.") - (string, bound, noerror, count) + "sWord search: ", + doc: /* Search forward from point for STRING, ignoring differences in punctuation. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (wordify (string), bound, noerror, count, 1, 1, 0); } DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4, - "sRE search backward: ", - "Search backward from point for match for regular expression REGEXP.\n\ -Set point to the beginning of the match, and return point.\n\ -The match found is the one starting last in the buffer\n\ -and yet ending before the origin of the search.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must start at or after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sRE search backward: ", + doc: /* Search backward from point for match for regular expression REGEXP. +Set point to the beginning of the match, and return point. +The match found is the one starting last in the buffer +and yet ending before the origin of the search. +An optional second argument bounds the search; it is a buffer position. +The match found must start at or after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, -1, 1, 0); } DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4, - "sRE search: ", - "Search forward from point for regular expression REGEXP.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sRE search: ", + doc: /* Search forward from point for regular expression REGEXP. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, 1, 1, 0); } DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4, - "sPosix search backward: ", - "Search backward from point for match for regular expression REGEXP.\n\ -Find the longest match in accord with Posix regular expression rules.\n\ -Set point to the beginning of the match, and return point.\n\ -The match found is the one starting last in the buffer\n\ -and yet ending before the origin of the search.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must start at or after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sPosix search backward: ", + doc: /* Search backward from point for match for regular expression REGEXP. +Find the longest match in accord with Posix regular expression rules. +Set point to the beginning of the match, and return point. +The match found is the one starting last in the buffer +and yet ending before the origin of the search. +An optional second argument bounds the search; it is a buffer position. +The match found must start at or after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, -1, 1, 1); } DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4, - "sPosix search: ", - "Search forward from point for regular expression REGEXP.\n\ -Find the longest match in accord with Posix regular expression rules.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sPosix search: ", + doc: /* Search forward from point for regular expression REGEXP. +Find the longest match in accord with Posix regular expression rules. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, 1, 1, 1); } DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0, - "Replace text matched by last search with NEWTEXT.\n\ -If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\ -Otherwise maybe capitalize the whole text, or maybe just word initials,\n\ -based on the replaced text.\n\ -If the replaced text has only capital letters\n\ -and has at least one multiletter word, convert NEWTEXT to all caps.\n\ -If the replaced text has at least one word starting with a capital letter,\n\ -then capitalize each word in NEWTEXT.\n\n\ -If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\ -Otherwise treat `\\' as special:\n\ - `\\&' in NEWTEXT means substitute original matched text.\n\ - `\\N' means substitute what matched the Nth `\\(...\\)'.\n\ - If Nth parens didn't match, substitute nothing.\n\ - `\\\\' means insert one `\\'.\n\ -FIXEDCASE and LITERAL are optional arguments.\n\ -Leaves point at end of replacement text.\n\ -\n\ -The optional fourth argument STRING can be a string to modify.\n\ -In that case, this function creates and returns a new string\n\ -which is made by replacing the part of STRING that was matched.\n\ -\n\ -The optional fifth argument SUBEXP specifies a subexpression of the match.\n\ -It says to replace just that subexpression instead of the whole match.\n\ -This is useful only after a regular expression search or match\n\ -since only regular expressions have distinguished subexpressions.") - (newtext, fixedcase, literal, string, subexp) + doc: /* Replace text matched by last search with NEWTEXT. +Leave point at the end of the replacement text. + +If second arg FIXEDCASE is non-nil, do not alter case of replacement text. +Otherwise maybe capitalize the whole text, or maybe just word initials, +based on the replaced text. +If the replaced text has only capital letters +and has at least one multiletter word, convert NEWTEXT to all caps. +Otherwise if all words are capitalized in the replaced text, +capitalize each word in NEWTEXT. + +If third arg LITERAL is non-nil, insert NEWTEXT literally. +Otherwise treat `\\' as special: + `\\&' in NEWTEXT means substitute original matched text. + `\\N' means substitute what matched the Nth `\\(...\\)'. + If Nth parens didn't match, substitute nothing. + `\\\\' means insert one `\\'. +Case conversion does not apply to these substitutions. + +FIXEDCASE and LITERAL are optional arguments. + +The optional fourth argument STRING can be a string to modify. +This is meaningful when the previous match was done against STRING, +using `string-match'. When used this way, `replace-match' +creates and returns a new string made by copying STRING and replacing +the part of STRING that was matched. + +The optional fifth argument SUBEXP specifies a subexpression; +it says to replace just that subexpression with NEWTEXT, +rather than replacing the entire matched text. +This is, in a vague sense, the inverse of using `\\N' in NEWTEXT; +`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts +NEWTEXT in place of subexp N. +This is useful only after a regular expression search or match, +since only regular expressions have distinguished subexpressions. */) + (newtext, fixedcase, literal, string, subexp) Lisp_Object newtext, fixedcase, literal, string, subexp; { enum { nochange, all_caps, cap_initial } case_action; - register int pos, last; + register int pos, pos_byte; int some_multiletter_word; int some_lowercase; int some_uppercase; int some_nonuppercase_initial; register int c, prevc; - int inslen; int sub; int opoint, newpoint; - CHECK_STRING (newtext, 0); + CHECK_STRING (newtext); if (! NILP (string)) - CHECK_STRING (string, 4); + CHECK_STRING (string); case_action = nochange; /* We tried an initialization */ /* but some C compilers blew it */ @@ -2188,7 +2227,7 @@ since only regular expressions have distinguished subexpressions.") sub = 0; else { - CHECK_NUMBER (subexp, 3); + CHECK_NUMBER (subexp); sub = XINT (subexp); if (sub < 0 || sub >= search_regs.num_regs) args_out_of_range (subexp, make_number (search_regs.num_regs)); @@ -2206,25 +2245,23 @@ since only regular expressions have distinguished subexpressions.") { if (search_regs.start[sub] < 0 || search_regs.start[sub] > search_regs.end[sub] - || search_regs.end[sub] > XSTRING (string)->size) + || search_regs.end[sub] > SCHARS (string)) args_out_of_range (make_number (search_regs.start[sub]), make_number (search_regs.end[sub])); } if (NILP (fixedcase)) { - int beg; /* Decide how to casify by examining the matched text. */ + int last; - if (NILP (string)) - last = CHAR_TO_BYTE (search_regs.end[sub]); - else - last = search_regs.end[sub]; + pos = search_regs.start[sub]; + last = search_regs.end[sub]; if (NILP (string)) - beg = CHAR_TO_BYTE (search_regs.start[sub]); + pos_byte = CHAR_TO_BYTE (pos); else - beg = search_regs.start[sub]; + pos_byte = string_char_to_byte (string, pos); prevc = '\n'; case_action = all_caps; @@ -2236,12 +2273,15 @@ since only regular expressions have distinguished subexpressions.") some_nonuppercase_initial = 0; some_uppercase = 0; - for (pos = beg; pos < last; pos++) + while (pos < last) { if (NILP (string)) - c = FETCH_BYTE (pos); + { + c = FETCH_CHAR (pos_byte); + INC_BOTH (pos, pos_byte); + } else - c = XSTRING (string)->data[pos]; + FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte); if (LOWERCASEP (c)) { @@ -2300,19 +2340,19 @@ since only regular expressions have distinguished subexpressions.") if desired. */ if (NILP (literal)) { - int lastpos = -1; - int lastpos_byte = -1; + int lastpos = 0; + int lastpos_byte = 0; /* We build up the substituted string in ACCUM. */ Lisp_Object accum; Lisp_Object middle; - int pos_byte; + int length = SBYTES (newtext); accum = Qnil; - for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));) + for (pos_byte = 0, pos = 0; pos_byte < length;) { int substart = -1; - int subend; + int subend = 0; int delbackslash = 0; FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); @@ -2320,18 +2360,27 @@ since only regular expressions have distinguished subexpressions.") if (c == '\\') { FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); + if (c == '&') { substart = search_regs.start[sub]; subend = search_regs.end[sub]; } - else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0') + else if (c >= '1' && c <= '9') { - if (search_regs.start[c - '0'] >= 0) + if (search_regs.start[c - '0'] >= 0 + && c <= search_regs.num_regs + '0') { substart = search_regs.start[c - '0']; subend = search_regs.end[c - '0']; } + else + { + /* If that subexp did not match, + replace \\N with nothing. */ + substart = 0; + subend = 0; + } } else if (c == '\\') delbackslash = 1; @@ -2340,10 +2389,10 @@ since only regular expressions have distinguished subexpressions.") } if (substart >= 0) { - if (pos - 1 != lastpos + 1) - middle = substring_both (newtext, lastpos + 1, - lastpos_byte + 1, - pos - 1, pos_byte - 1); + if (pos - 2 != lastpos) + middle = substring_both (newtext, lastpos, + lastpos_byte, + pos - 2, pos_byte - 2); else middle = Qnil; accum = concat3 (accum, middle, @@ -2355,9 +2404,9 @@ since only regular expressions have distinguished subexpressions.") } else if (delbackslash) { - middle = substring_both (newtext, lastpos + 1, - lastpos_byte + 1, - pos, pos_byte); + middle = substring_both (newtext, lastpos, + lastpos_byte, + pos - 1, pos_byte - 1); accum = concat2 (accum, middle); lastpos = pos; @@ -2365,9 +2414,9 @@ since only regular expressions have distinguished subexpressions.") } } - if (pos != lastpos + 1) - middle = substring_both (newtext, lastpos + 1, - lastpos_byte + 1, + if (pos != lastpos) + middle = substring_both (newtext, lastpos, + lastpos_byte, pos, pos_byte); else middle = Qnil; @@ -2384,66 +2433,173 @@ since only regular expressions have distinguished subexpressions.") return concat3 (before, newtext, after); } - /* Record point, the move (quietly) to the start of the match. */ - if (PT > search_regs.start[sub]) + /* Record point, then move (quietly) to the start of the match. */ + if (PT >= search_regs.end[sub]) opoint = PT - ZV; + else if (PT > search_regs.start[sub]) + opoint = search_regs.end[sub] - ZV; else opoint = PT; - TEMP_SET_PT (search_regs.start[sub]); - - /* We insert the replacement text before the old text, and then - delete the original text. This means that markers at the - beginning or end of the original will float to the corresponding - position in the replacement. */ - if (!NILP (literal)) - Finsert_and_inherit (1, &newtext); - else + /* If we want non-literal replacement, + perform substitution on the replacement string. */ + if (NILP (literal)) { - struct gcpro gcpro1; - GCPRO1 (newtext); - - for (pos = 0; pos < XSTRING (newtext)->size; pos++) + int length = SBYTES (newtext); + unsigned char *substed; + int substed_alloc_size, substed_len; + int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters); + int str_multibyte = STRING_MULTIBYTE (newtext); + Lisp_Object rev_tbl; + int really_changed = 0; + + rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table) + ? Fchar_table_extra_slot (Vnonascii_translation_table, + make_number (0)) + : Qnil); + + substed_alloc_size = length * 2 + 100; + substed = (unsigned char *) xmalloc (substed_alloc_size + 1); + substed_len = 0; + + /* Go thru NEWTEXT, producing the actual text to insert in + SUBSTED while adjusting multibyteness to that of the current + buffer. */ + + for (pos_byte = 0, pos = 0; pos_byte < length;) { - int offset = PT - search_regs.start[sub]; + unsigned char str[MAX_MULTIBYTE_LENGTH]; + unsigned char *add_stuff = NULL; + int add_len = 0; + int idx = -1; + + if (str_multibyte) + { + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte); + if (!buf_multibyte) + c = multibyte_char_to_unibyte (c, rev_tbl); + } + else + { + /* Note that we don't have to increment POS. */ + c = SREF (newtext, pos_byte++); + if (buf_multibyte) + c = unibyte_char_to_multibyte (c); + } + + /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED, + or set IDX to a match index, which means put that part + of the buffer text into SUBSTED. */ - c = XSTRING (newtext)->data[pos]; if (c == '\\') { - c = XSTRING (newtext)->data[++pos]; + really_changed = 1; + + if (str_multibyte) + { + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, + pos, pos_byte); + if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c)) + c = multibyte_char_to_unibyte (c, rev_tbl); + } + else + { + c = SREF (newtext, pos_byte++); + if (buf_multibyte) + c = unibyte_char_to_multibyte (c); + } + if (c == '&') - Finsert_buffer_substring - (Fcurrent_buffer (), - make_number (search_regs.start[sub] + offset), - make_number (search_regs.end[sub] + offset)); + idx = sub; else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0') { if (search_regs.start[c - '0'] >= 1) - Finsert_buffer_substring - (Fcurrent_buffer (), - make_number (search_regs.start[c - '0'] + offset), - make_number (search_regs.end[c - '0'] + offset)); + idx = c - '0'; } else if (c == '\\') - insert_char (c); + add_len = 1, add_stuff = "\\"; else - error ("Invalid use of `\\' in replacement text"); + { + xfree (substed); + error ("Invalid use of `\\' in replacement text"); + } } else - insert_char (c); + { + add_len = CHAR_STRING (c, str); + add_stuff = str; + } + + /* If we want to copy part of a previous match, + set up ADD_STUFF and ADD_LEN to point to it. */ + if (idx >= 0) + { + int begbyte = CHAR_TO_BYTE (search_regs.start[idx]); + add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte; + if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx]) + move_gap (search_regs.start[idx]); + add_stuff = BYTE_POS_ADDR (begbyte); + } + + /* Now the stuff we want to add to SUBSTED + is invariably ADD_LEN bytes starting at ADD_STUFF. */ + + /* Make sure SUBSTED is big enough. */ + if (substed_len + add_len >= substed_alloc_size) + { + substed_alloc_size = substed_len + add_len + 500; + substed = (unsigned char *) xrealloc (substed, + substed_alloc_size + 1); + } + + /* Now add to the end of SUBSTED. */ + if (add_stuff) + { + bcopy (add_stuff, substed + substed_len, add_len); + substed_len += add_len; + } + } + + if (really_changed) + { + if (buf_multibyte) + { + int nchars = multibyte_chars_in_text (substed, substed_len); + + newtext = make_multibyte_string (substed, nchars, substed_len); + } + else + newtext = make_unibyte_string (substed, substed_len); } - UNGCPRO; + xfree (substed); } - inslen = PT - (search_regs.start[sub]); - del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen); + /* Replace the old text with the new in the cleanest possible way. */ + replace_range (search_regs.start[sub], search_regs.end[sub], + newtext, 1, 0, 1); + newpoint = search_regs.start[sub] + SCHARS (newtext); if (case_action == all_caps) - Fupcase_region (make_number (PT - inslen), make_number (PT)); + Fupcase_region (make_number (search_regs.start[sub]), + make_number (newpoint)); else if (case_action == cap_initial) - Fupcase_initials_region (make_number (PT - inslen), make_number (PT)); + Fupcase_initials_region (make_number (search_regs.start[sub]), + make_number (newpoint)); + + /* Adjust search data for this change. */ + { + int oldend = search_regs.end[sub]; + int change = newpoint - search_regs.end[sub]; + int i; - newpoint = PT; + for (i = 0; i < search_regs.num_regs; i++) + { + if (search_regs.start[i] > oldend) + search_regs.start[i] += change; + if (search_regs.end[i] > oldend) + search_regs.end[i] += change; + } + } /* Put point back where it was in the text. */ if (opoint <= 0) @@ -2453,7 +2609,7 @@ since only regular expressions have distinguished subexpressions.") /* Now move point "officially" to the start of the inserted replacement. */ move_if_not_intangible (newpoint); - + return Qnil; } @@ -2464,7 +2620,7 @@ match_limit (num, beginningp) { register int n; - CHECK_NUMBER (num, 0); + CHECK_NUMBER (num); n = XINT (num); if (n < 0 || n >= search_regs.num_regs) args_out_of_range (num, make_number (search_regs.num_regs)); @@ -2476,43 +2632,45 @@ match_limit (num, beginningp) } DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0, - "Return position of start of text matched by last search.\n\ -SUBEXP, a number, specifies which parenthesized expression in the last\n\ - regexp.\n\ -Value is nil if SUBEXPth pair didn't match, or there were less than\n\ - SUBEXP pairs.\n\ -Zero means the entire text matched by the whole regexp or whole string.") - (subexp) + doc: /* Return position of start of text matched by last search. +SUBEXP, a number, specifies which parenthesized expression in the last + regexp. +Value is nil if SUBEXPth pair didn't match, or there were less than + SUBEXP pairs. +Zero means the entire text matched by the whole regexp or whole string. */) + (subexp) Lisp_Object subexp; { return match_limit (subexp, 1); } DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0, - "Return position of end of text matched by last search.\n\ -SUBEXP, a number, specifies which parenthesized expression in the last\n\ - regexp.\n\ -Value is nil if SUBEXPth pair didn't match, or there were less than\n\ - SUBEXP pairs.\n\ -Zero means the entire text matched by the whole regexp or whole string.") - (subexp) + doc: /* Return position of end of text matched by last search. +SUBEXP, a number, specifies which parenthesized expression in the last + regexp. +Value is nil if SUBEXPth pair didn't match, or there were less than + SUBEXP pairs. +Zero means the entire text matched by the whole regexp or whole string. */) + (subexp) Lisp_Object subexp; { return match_limit (subexp, 0); -} +} DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0, - "Return a list containing all info on what the last search matched.\n\ -Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\ -All the elements are markers or nil (nil if the Nth pair didn't match)\n\ -if the last match was on a buffer; integers or nil if a string was matched.\n\ -Use `store-match-data' to reinstate the data in this list.\n\ -\n\ -If INTEGERS (the optional first argument) is non-nil, always use integers\n\ -\(rather than markers) to represent buffer positions.\n\ -If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\ -to hold all the values, and if INTEGERS is non-nil, no consing is done.") - (integers, reuse) + doc: /* Return a list containing all info on what the last search matched. +Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'. +All the elements are markers or nil (nil if the Nth pair didn't match) +if the last match was on a buffer; integers or nil if a string was matched. +Use `store-match-data' to reinstate the data in this list. + +If INTEGERS (the optional first argument) is non-nil, always use integers +\(rather than markers) to represent buffer positions. +If REUSE is a list, reuse it as part of the value. If REUSE is long enough +to hold all the values, and if INTEGERS is non-nil, no consing is done. + +Return value is undefined if the last search failed. */) + (integers, reuse) Lisp_Object integers, reuse; { Lisp_Object tail, prev; @@ -2522,6 +2680,8 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.") if (NILP (last_thing_searched)) return Qnil; + prev = Qnil; + data = (Lisp_Object *) alloca ((2 * search_regs.num_regs) * sizeof (Lisp_Object)); @@ -2545,7 +2705,7 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.") last_thing_searched); data[2 * i + 1] = Fmake_marker (); Fset_marker (data[2 * i + 1], - make_number (search_regs.end[i]), + make_number (search_regs.end[i]), last_thing_searched); } else @@ -2565,28 +2725,28 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.") /* If REUSE is a list, store as many value elements as will fit into the elements of REUSE. */ for (i = 0, tail = reuse; CONSP (tail); - i++, tail = XCONS (tail)->cdr) + i++, tail = XCDR (tail)) { if (i < 2 * len + 2) - XCONS (tail)->car = data[i]; + XSETCAR (tail, data[i]); else - XCONS (tail)->car = Qnil; + XSETCAR (tail, Qnil); prev = tail; } /* If we couldn't fit all value elements into REUSE, cons up the rest of them and add them to the end of REUSE. */ if (i < 2 * len + 2) - XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i); + XSETCDR (prev, Flist (2 * len + 2 - i, data + i)); return reuse; } DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0, - "Set internal data on last search match from elements of LIST.\n\ -LIST should have been created by calling `match-data' previously.") - (list) + doc: /* Set internal data on last search match from elements of LIST. +LIST should have been created by calling `match-data' previously. */) + (list) register Lisp_Object list; { register int i; @@ -2598,7 +2758,7 @@ LIST should have been created by calling `match-data' previously.") if (!CONSP (list) && !NILP (list)) list = wrong_type_argument (Qconsp, list); - /* Unless we find a marker with a buffer in LIST, assume that this + /* Unless we find a marker with a buffer in LIST, assume that this match data came from a string. */ last_thing_searched = Qt; @@ -2625,6 +2785,9 @@ LIST should have been created by calling `match-data' previously.") length * sizeof (regoff_t)); } + for (i = search_regs.num_regs; i < length; i++) + search_regs.start[i] = -1; + search_regs.num_regs = length; } } @@ -2639,6 +2802,8 @@ LIST should have been created by calling `match-data' previously.") } else { + int from; + if (MARKERP (marker)) { if (XMARKER (marker)->buffer == 0) @@ -2647,21 +2812,22 @@ LIST should have been created by calling `match-data' previously.") XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer); } - CHECK_NUMBER_COERCE_MARKER (marker, 0); - search_regs.start[i] = XINT (marker); + CHECK_NUMBER_COERCE_MARKER (marker); + from = XINT (marker); list = Fcdr (list); marker = Fcar (list); if (MARKERP (marker) && XMARKER (marker)->buffer == 0) XSETFASTINT (marker, 0); - CHECK_NUMBER_COERCE_MARKER (marker, 0); + CHECK_NUMBER_COERCE_MARKER (marker); + search_regs.start[i] = from; search_regs.end[i] = XINT (marker); } list = Fcdr (list); } - return Qnil; + return Qnil; } /* If non-zero the match data have been saved in saved_search_regs @@ -2709,23 +2875,23 @@ restore_match_data () /* Quote a string to inactivate reg-expr chars */ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, - "Return a regexp string which matches exactly STRING and nothing else.") - (string) + doc: /* Return a regexp string which matches exactly STRING and nothing else. */) + (string) Lisp_Object string; { register unsigned char *in, *out, *end; register unsigned char *temp; int backslashes_added = 0; - CHECK_STRING (string, 0); + CHECK_STRING (string); - temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2); + temp = (unsigned char *) alloca (SBYTES (string) * 2); /* Now copy the data into the new string, inserting escapes. */ - in = XSTRING (string)->data; - end = in + STRING_BYTES (XSTRING (string)); - out = temp; + in = SDATA (string); + end = in + SBYTES (string); + out = temp; for (; in != end; in++) { @@ -2738,11 +2904,11 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, } return make_specified_string (temp, - XSTRING (string)->size + backslashes_added, + SCHARS (string) + backslashes_added, out - temp, STRING_MULTIBYTE (string)); } - + void syms_of_search () { @@ -2751,7 +2917,7 @@ syms_of_search () for (i = 0; i < REGEXP_CACHE_SIZE; ++i) { searchbufs[i].buf.allocated = 100; - searchbufs[i].buf.buffer = (unsigned char *) malloc (100); + searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100); searchbufs[i].buf.fastmap = searchbufs[i].fastmap; searchbufs[i].regexp = Qnil; staticpro (&searchbufs[i].regexp); @@ -2796,3 +2962,6 @@ syms_of_search () defsubr (&Sset_match_data); defsubr (&Sregexp_quote); } + +/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f + (do not change this comment) */