X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/26aff1500fb8b7d26eaf36399796a9dbea20a106..845975f566069b9b48c21dc2ba239d23c2bd823c:/src/search.c diff --git a/src/search.c b/src/search.c index 26569920b1..722af7afa4 100644 --- a/src/search.c +++ b/src/search.c @@ -1,5 +1,5 @@ /* String search routines for GNU Emacs. - Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc. + Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -94,12 +94,6 @@ matcher_overflow () error ("Stack overflow in regexp matcher"); } -#ifdef __STDC__ -#define CONST const -#else -#define CONST -#endif - /* Compile a regexp and signal a Lisp error if anything goes wrong. PATTERN is the pattern to compile. CP is the place to put the result. @@ -123,7 +117,7 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) int posix; int multibyte; { - char *raw_pattern; + unsigned char *raw_pattern; int raw_pattern_size; char *val; reg_syntax_t old; @@ -134,14 +128,14 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) if (multibyte == STRING_MULTIBYTE (pattern)) { - raw_pattern = (char *) XSTRING (pattern)->data; - raw_pattern_size = XSTRING (pattern)->size_byte; + raw_pattern = (unsigned char *) XSTRING (pattern)->data; + raw_pattern_size = STRING_BYTES (XSTRING (pattern)); } else if (multibyte) { raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data, XSTRING (pattern)->size); - raw_pattern = (char *) alloca (raw_pattern_size + 1); + raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); copy_text (XSTRING (pattern)->data, raw_pattern, XSTRING (pattern)->size, 0, 1); } @@ -154,19 +148,20 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) so that only the multibyte chars which really correspond to the chosen single-byte character set can possibly match. */ raw_pattern_size = XSTRING (pattern)->size; - raw_pattern = (char *) alloca (raw_pattern_size + 1); + raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); copy_text (XSTRING (pattern)->data, raw_pattern, - XSTRING (pattern)->size_byte, 1, 0); + STRING_BYTES (XSTRING (pattern)), 1, 0); } cp->regexp = Qnil; - cp->buf.translate = (! NILP (translate) ? translate : 0); + cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); cp->posix = posix; cp->buf.multibyte = multibyte; BLOCK_INPUT; old = re_set_syntax (RE_SYNTAX_EMACS | (posix ? 0 : RE_NO_POSIX_BACKTRACKING)); - val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf); + val = (char *) re_compile_pattern ((char *)raw_pattern, + raw_pattern_size, &cp->buf); re_set_syntax (old); UNBLOCK_INPUT; if (val) @@ -175,6 +170,23 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte) cp->regexp = Fcopy_sequence (pattern); } +/* Shrink each compiled regexp buffer in the cache + to the size actually used right now. + This is called from garbage collection. */ + +void +shrink_regexp_cache () +{ + struct regexp_cache *cp; + + for (cp = searchbuf_head; cp != 0; cp = cp->next) + { + cp->buf.allocated = cp->buf.used; + cp->buf.buffer + = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used); + } +} + /* Compile a regexp if necessary, but first check to see if there's one in the cache. PATTERN is the pattern to compile. @@ -198,16 +210,27 @@ compile_pattern (pattern, regp, translate, posix, multibyte) for (cpp = &searchbuf_head; ; cpp = &cp->next) { cp = *cpp; + /* Entries are initialized to nil, and may be set to nil by + compile_pattern_1 if the pattern isn't valid. Don't apply + XSTRING in those cases. However, compile_pattern_1 is only + applied to the cache entry we pick here to reuse. So nil + should never appear before a non-nil entry. */ + if (NILP (cp->regexp)) + goto compile_it; if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size + && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) && !NILP (Fstring_equal (cp->regexp, pattern)) - && cp->buf.translate == (! NILP (translate) ? translate : 0) + && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) && cp->posix == posix && cp->buf.multibyte == multibyte) break; - /* If we're at the end of the cache, compile into the last cell. */ + /* If we're at the end of the cache, compile into the nil cell + we found, or the last (least recently used) cell with a + string value. */ if (cp->next == 0) { + compile_it: compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte); break; } @@ -253,7 +276,7 @@ looking_at_1 (string, posix) if (running_asynch_code) save_search_regs (); - CHECK_STRING (string, 0); + CHECK_STRING (string); bufp = compile_pattern (string, &search_regs, (!NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : Qnil), @@ -287,6 +310,8 @@ looking_at_1 (string, posix) i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2, PT_BYTE - BEGV_BYTE, &search_regs, ZV_BYTE - BEGV_BYTE); + immediate_quit = 0; + if (i == -2) matcher_overflow (); @@ -301,28 +326,27 @@ looking_at_1 (string, posix) = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE); } XSETBUFFER (last_thing_searched, current_buffer); - immediate_quit = 0; return val; } DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0, - "Return t if text after point matches regular expression REGEXP.\n\ -This function modifies the match data that `match-beginning',\n\ -`match-end' and `match-data' access; save and restore the match\n\ -data if you want to preserve them.") - (regexp) + doc: /* Return t if text after point matches regular expression REGEXP. +This function modifies the match data that `match-beginning', +`match-end' and `match-data' access; save and restore the match +data if you want to preserve them. */) + (regexp) Lisp_Object regexp; { return looking_at_1 (regexp, 0); } DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0, - "Return t if text after point matches regular expression REGEXP.\n\ -Find the longest match, in accord with Posix regular expression rules.\n\ -This function modifies the match data that `match-beginning',\n\ -`match-end' and `match-data' access; save and restore the match\n\ -data if you want to preserve them.") - (regexp) + doc: /* Return t if text after point matches regular expression REGEXP. +Find the longest match, in accord with Posix regular expression rules. +This function modifies the match data that `match-beginning', +`match-end' and `match-data' access; save and restore the match +data if you want to preserve them. */) + (regexp) Lisp_Object regexp; { return looking_at_1 (regexp, 1); @@ -341,8 +365,8 @@ string_match_1 (regexp, string, start, posix) if (running_asynch_code) save_search_regs (); - CHECK_STRING (regexp, 0); - CHECK_STRING (string, 1); + CHECK_STRING (regexp); + CHECK_STRING (string); if (NILP (start)) pos = 0, pos_byte = 0; @@ -350,7 +374,7 @@ string_match_1 (regexp, string, start, posix) { int len = XSTRING (string)->size; - CHECK_NUMBER (start, 2); + CHECK_NUMBER (start); pos = XINT (start); if (pos < 0 && -pos <= len) pos = len + pos; @@ -368,8 +392,8 @@ string_match_1 (regexp, string, start, posix) re_match_object = string; val = re_search (bufp, (char *) XSTRING (string)->data, - XSTRING (string)->size_byte, pos_byte, - XSTRING (string)->size_byte - pos_byte, + STRING_BYTES (XSTRING (string)), pos_byte, + STRING_BYTES (XSTRING (string)) - pos_byte, &search_regs); immediate_quit = 0; last_thing_searched = Qt; @@ -390,25 +414,27 @@ string_match_1 (regexp, string, start, posix) } DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0, - "Return index of start of first match for REGEXP in STRING, or nil.\n\ -If third arg START is non-nil, start search at that index in STRING.\n\ -For index of first char beyond the match, do (match-end 0).\n\ -`match-end' and `match-beginning' also give indices of substrings\n\ -matched by parenthesis constructs in the pattern.") - (regexp, string, start) + doc: /* Return index of start of first match for REGEXP in STRING, or nil. +Case is ignored if `case-fold-search' is non-nil in the current buffer. +If third arg START is non-nil, start search at that index in STRING. +For index of first char beyond the match, do (match-end 0). +`match-end' and `match-beginning' also give indices of substrings +matched by parenthesis constructs in the pattern. */) + (regexp, string, start) Lisp_Object regexp, string, start; { return string_match_1 (regexp, string, start, 0); } DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0, - "Return index of start of first match for REGEXP in STRING, or nil.\n\ -Find the longest match, in accord with Posix regular expression rules.\n\ -If third arg START is non-nil, start search at that index in STRING.\n\ -For index of first char beyond the match, do (match-end 0).\n\ -`match-end' and `match-beginning' also give indices of substrings\n\ -matched by parenthesis constructs in the pattern.") - (regexp, string, start) + doc: /* Return index of start of first match for REGEXP in STRING, or nil. +Find the longest match, in accord with Posix regular expression rules. +Case is ignored if `case-fold-search' is non-nil in the current buffer. +If third arg START is non-nil, start search at that index in STRING. +For index of first char beyond the match, do (match-end 0). +`match-end' and `match-beginning' also give indices of substrings +matched by parenthesis constructs in the pattern. */) + (regexp, string, start) Lisp_Object regexp, string, start; { return string_match_1 (regexp, string, start, 1); @@ -431,8 +457,8 @@ fast_string_match (regexp, string) re_match_object = string; val = re_search (bufp, (char *) XSTRING (string)->data, - XSTRING (string)->size_byte, 0, XSTRING (string)->size_byte, - 0); + STRING_BYTES (XSTRING (string)), 0, + STRING_BYTES (XSTRING (string)), 0); immediate_quit = 0; return val; } @@ -464,23 +490,6 @@ fast_c_string_match_ignore_case (regexp, string) return val; } -/* max and min. */ - -static int -max (a, b) - int a, b; -{ - return ((a > b) ? a : b); -} - -static int -min (a, b) - int a, b; -{ - return ((a < b) ? a : b); -} - - /* The newline cache: remembering which sections of text have no newlines. */ /* If the user has requested newline caching, make sure it's on. @@ -529,6 +538,7 @@ newline_cache_on_off (buf) If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do except when inside redisplay. */ +int scan_buffer (target, start, end, count, shortage, allow_quit) register int target; int start, end; @@ -568,6 +578,7 @@ scan_buffer (target, start, end, count, shortage, allow_quit) examine. */ int ceiling_byte = CHAR_TO_BYTE (end) - 1; int start_byte = CHAR_TO_BYTE (start); + int tem; /* If we're looking for a newline, consult the newline cache to see where we can avoid some scanning. */ @@ -593,7 +604,8 @@ scan_buffer (target, start, end, count, shortage, allow_quit) bytes. BUFFER_CEILING_OF returns the last character position that is contiguous, so the ceiling is the position after that. */ - ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte); + tem = BUFFER_CEILING_OF (start_byte); + ceiling_byte = min (tem, ceiling_byte); { /* The termination address of the dumb loop. */ @@ -639,6 +651,7 @@ scan_buffer (target, start, end, count, shortage, allow_quit) /* The last character to check before the next obstacle. */ int ceiling_byte = CHAR_TO_BYTE (end); int start_byte = CHAR_TO_BYTE (start); + int tem; /* Consult the newline cache, if appropriate. */ if (target == '\n' && newline_cache) @@ -660,7 +673,8 @@ scan_buffer (target, start, end, count, shortage, allow_quit) } /* Stop scanning before the gap. */ - ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte); + tem = BUFFER_FLOOR_OF (start_byte - 1); + ceiling_byte = max (tem, ceiling_byte); { /* The termination address of the dumb loop. */ @@ -738,11 +752,6 @@ scan_newline (start, start_byte, limit, limit_byte, count, allow_quit) int old_immediate_quit = immediate_quit; - /* If we are not in selective display mode, - check only for newlines. */ - int selective_display = (!NILP (current_buffer->selective_display) - && !INTEGERP (current_buffer->selective_display)); - /* The code that follows is like scan_buffer but checks for either newline or carriage return. */ @@ -786,7 +795,6 @@ scan_newline (start, start_byte, limit, limit_byte, count, allow_quit) } else { - int start_byte = CHAR_TO_BYTE (start); while (start_byte > limit_byte) { ceiling = BUFFER_FLOOR_OF (start_byte - 1); @@ -864,11 +872,11 @@ search_command (string, bound, noerror, count, direction, RE, posix) if (!NILP (count)) { - CHECK_NUMBER (count, 3); + CHECK_NUMBER (count); n *= XINT (count); } - CHECK_STRING (string, 0); + CHECK_STRING (string); if (NILP (bound)) { if (n > 0) @@ -878,7 +886,7 @@ search_command (string, bound, noerror, count, direction, RE, posix) } else { - CHECK_NUMBER_COERCE_MARKER (bound, 1); + CHECK_NUMBER_COERCE_MARKER (bound); lim = XINT (bound); if (n > 0 ? lim < PT : lim > PT) error ("Invalid search bound (wrong side of point)"); @@ -931,9 +939,8 @@ static int trivial_regexp_p (regexp) Lisp_Object regexp; { - int len = XSTRING (regexp)->size_byte; + int len = STRING_BYTES (XSTRING (regexp)); unsigned char *s = XSTRING (regexp)->data; - unsigned char c; while (--len >= 0) { switch (*s++) @@ -947,7 +954,7 @@ trivial_regexp_p (regexp) { case '|': case '(': case ')': case '`': case '\'': case 'b': case 'B': case '<': case '>': case 'w': case 'W': case 's': - case 'S': case '=': + case 'S': case '=': case '{': case '}': case 'c': case 'C': /* for categoryspec and notcategoryspec */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -1005,23 +1012,20 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, int posix; { int len = XSTRING (string)->size; - int len_byte = XSTRING (string)->size_byte; + int len_byte = STRING_BYTES (XSTRING (string)); register int i; if (running_asynch_code) save_search_regs (); + /* Searching 0 times means don't move. */ /* Null string is found at starting position. */ - if (len == 0) + if (len == 0 || n == 0) { - set_search_regs (pos, 0); + set_search_regs (pos_byte, 0); return pos; } - /* Searching 0 times means don't move. */ - if (n == 0) - return pos; - if (RE && !trivial_regexp_p (string)) { unsigned char *p1, *p2; @@ -1134,7 +1138,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, int multibyte = !NILP (current_buffer->enable_multibyte_characters); unsigned char *base_pat = XSTRING (string)->data; int charset_base = -1; - int simple = 1; + int boyer_moore_ok = 1; /* MULTIBYTE says whether the text to be searched is multibyte. We must convert PATTERN to match that, or we will not really @@ -1142,9 +1146,9 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, if (multibyte == STRING_MULTIBYTE (string)) { - raw_pattern = (char *) XSTRING (string)->data; + raw_pattern = (unsigned char *) XSTRING (string)->data; raw_pattern_size = XSTRING (string)->size; - raw_pattern_size_byte = XSTRING (string)->size_byte; + raw_pattern_size_byte = STRING_BYTES (XSTRING (string)); } else if (multibyte) { @@ -1152,7 +1156,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, raw_pattern_size_byte = count_size_as_multibyte (XSTRING (string)->data, raw_pattern_size); - raw_pattern = (char *) alloca (raw_pattern_size_byte + 1); + raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1); copy_text (XSTRING (string)->data, raw_pattern, XSTRING (string)->size, 0, 1); } @@ -1166,9 +1170,9 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, the chosen single-byte character set can possibly match. */ raw_pattern_size = XSTRING (string)->size; raw_pattern_size_byte = XSTRING (string)->size; - raw_pattern = (char *) alloca (raw_pattern_size + 1); + raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1); copy_text (XSTRING (string)->data, raw_pattern, - XSTRING (string)->size_byte, 1, 0); + STRING_BYTES (XSTRING (string)), 1, 0); } /* Copy and optionally translate the pattern. */ @@ -1181,7 +1185,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, { while (--len >= 0) { - unsigned char workbuf[4], *str; + unsigned char str[MAX_MULTIBYTE_LENGTH]; int c, translated, inverse; int in_charlen, charlen; @@ -1196,17 +1200,26 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen); + /* Translate the character, if requested. */ TRANSLATE (translated, trt, c); /* If translation changed the byte-length, go back to the original character. */ - charlen = CHAR_STRING (translated, workbuf, str); + charlen = CHAR_STRING (translated, str); if (in_charlen != charlen) { translated = c; - charlen = CHAR_STRING (c, workbuf, str); + charlen = CHAR_STRING (c, str); } + /* If we are searching for something strange, + an invalid multibyte code, don't use boyer-moore. */ + if (! ASCII_BYTE_P (translated) + && (charlen == 1 /* 8bit code */ + || charlen != in_charlen /* invalid multibyte code */ + )) + boyer_moore_ok = 0; + TRANSLATE (inverse, inverse_trt, c); /* Did this char actually get translated? @@ -1215,15 +1228,13 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, { /* Keep track of which character set row contains the characters that need translation. */ - int charset_base_code = c & ~0xff; + int charset_base_code = c & ~CHAR_FIELD3_MASK; if (charset_base == -1) charset_base = charset_base_code; else if (charset_base != charset_base_code) /* If two different rows appear, needing translation, then we cannot use boyer_moore search. */ - simple = 0; - /* ??? Handa: this must do simple = 0 - if c is a composite character. */ + boyer_moore_ok = 0; } /* Store this character into the translated pattern. */ @@ -1235,9 +1246,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } else { + /* Unibyte buffer. */ + charset_base = 0; while (--len >= 0) { - int c, translated, inverse; + int c, translated; /* If we got here and the RE flag is set, it's because we're dealing with a regexp known to be trivial, so the backslash @@ -1249,22 +1262,6 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, } c = *base_pat++; TRANSLATE (translated, trt, c); - TRANSLATE (inverse, inverse_trt, c); - - /* Did this char actually get translated? - Would any other char get translated into it? */ - if (translated != c || inverse != c) - { - /* Keep track of which character set row - contains the characters that need translation. */ - int charset_base_code = c & ~0xff; - if (charset_base == -1) - charset_base = charset_base_code; - else if (charset_base != charset_base_code) - /* If two different rows appear, needing translation, - then we cannot use boyer_moore search. */ - simple = 0; - } *pat++ = translated; } } @@ -1273,7 +1270,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n, len = raw_pattern_size; pat = base_pat = patbuf; - if (simple) + if (boyer_moore_ok) return boyer_moore (n, pat, len, len_byte, trt, inverse_trt, pos, pos_byte, lim, lim_byte, charset_base); @@ -1305,6 +1302,7 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) int lim, lim_byte; { int multibyte = ! NILP (current_buffer->enable_multibyte_characters); + int forward = n > 0; if (lim > pos && multibyte) while (n > 0) @@ -1323,22 +1321,23 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) while (this_len > 0) { int charlen, buf_charlen; - int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); - int buf_ch; - - this_len_byte -= charlen; - this_len--; - p += charlen; + int pat_ch, buf_ch; + pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), ZV_BYTE - this_pos_byte, buf_charlen); - this_pos_byte += buf_charlen; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + + this_len_byte -= charlen; + this_len--; + p += charlen; + + this_pos_byte += buf_charlen; + this_pos++; } if (this_len == 0) @@ -1370,12 +1369,13 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) { int pat_ch = *p++; int buf_ch = FETCH_BYTE (this_pos); - this_len--; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + + this_len--; + this_pos++; } if (this_len == 0) @@ -1408,22 +1408,22 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) while (this_len > 0) { int charlen, buf_charlen; - int pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); - int buf_ch; - - this_len_byte -= charlen; - this_len--; - p += charlen; + int pat_ch, buf_ch; + pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen); buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte), ZV_BYTE - this_pos_byte, buf_charlen); - this_pos_byte += buf_charlen; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + + this_len_byte -= charlen; + this_len--; + p += charlen; + this_pos_byte += buf_charlen; + this_pos++; } if (this_len == 0) @@ -1455,12 +1455,12 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) { int pat_ch = *p++; int buf_ch = FETCH_BYTE (this_pos); - this_len--; - this_pos++; TRANSLATE (buf_ch, trt, buf_ch); if (buf_ch != pat_ch) break; + this_len--; + this_pos++; } if (this_len == 0) @@ -1478,7 +1478,10 @@ simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte) stop: if (n == 0) { - set_search_regs (multibyte ? pos_byte : pos, len_byte); + if (forward) + set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte); + else + set_search_regs (multibyte ? pos_byte : pos, len_byte); return pos; } @@ -1515,17 +1518,17 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, { int direction = ((n > 0) ? 1 : -1); register int dirlen; - int infinity, limit, k, stride_for_teases; + int infinity, limit, stride_for_teases = 0; register int *BM_tab; int *BM_tab_base; register unsigned char *cursor, *p_limit; register int i, j; - unsigned char *pat; + unsigned char *pat, *pat_end; int multibyte = ! NILP (current_buffer->enable_multibyte_characters); unsigned char simple_translate[0400]; - int translate_prev_byte; - int translate_anteprev_byte; + int translate_prev_byte = 0; + int translate_anteprev_byte = 0; #ifdef C_ALLOCA int BM_tab_space[0400]; @@ -1562,10 +1565,15 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, dirlen = len_byte * direction; infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction; + + /* Record position after the end of the pattern. */ + pat_end = base_pat + len_byte; + /* BASE_PAT points to a character that we start scanning from. + It is the first character in a forward search, + the last character in a backward search. */ if (direction < 0) - pat = (base_pat += len_byte - 1); - else - pat = base_pat; + base_pat = pat_end - 1; + BM_tab_base = BM_tab; BM_tab += 0400; j = dirlen; /* to get it in a register */ @@ -1589,7 +1597,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, i = 0; while (i != infinity) { - unsigned char *ptr = pat + i; + unsigned char *ptr = base_pat + i; i += direction; if (i == dirlen) i = infinity; @@ -1600,15 +1608,16 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, int this_translated = 1; if (multibyte - && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1]))) + /* Is *PTR the last byte of a character? */ + && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1]))) { unsigned char *charstart = ptr; while (! CHAR_HEAD_P (*charstart)) charstart--; untranslated = STRING_CHAR (charstart, ptr - charstart + 1); - TRANSLATE (ch, trt, untranslated); - if (charset_base == (ch & ~0xff)) + if (charset_base == (untranslated & ~CHAR_FIELD3_MASK)) { + TRANSLATE (ch, trt, untranslated); if (! CHAR_HEAD_P (*ptr)) { translate_prev_byte = ptr[-1]; @@ -1617,7 +1626,10 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, } } else - this_translated = 0; + { + this_translated = 0; + ch = *ptr; + } } else if (!multibyte) TRANSLATE (ch, trt, *ptr); @@ -1627,23 +1639,38 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, this_translated = 0; } - k = j = (unsigned char) ch; + if (ch > 0400) + j = ((unsigned char) ch) | 0200; + else + j = (unsigned char) ch; + if (i == infinity) stride_for_teases = BM_tab[j]; + BM_tab[j] = dirlen - i; /* A translation table is accompanied by its inverse -- see */ /* comment following downcase_table for details */ if (this_translated) - while (1) - { - TRANSLATE (ch, inverse_trt, ch); - /* For all the characters that map into K, - set up simple_translate to map them into K. */ - simple_translate[(unsigned char) ch] = k; - if ((unsigned char) ch == k) - break; - BM_tab[(unsigned char) ch] = dirlen - i; - } + { + int starting_ch = ch; + int starting_j = j; + while (1) + { + TRANSLATE (ch, inverse_trt, ch); + if (ch > 0400) + j = ((unsigned char) ch) | 0200; + else + j = (unsigned char) ch; + + /* For all the characters that map into CH, + set up simple_translate to map the last byte + into STARTING_J. */ + simple_translate[j] = starting_j; + if (ch == starting_ch) + break; + BM_tab[j] = dirlen - i; + } + } } else { @@ -1678,14 +1705,22 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, QUIT; pat = base_pat; limit = pos_byte - dirlen + direction; - limit = ((direction > 0) - ? BUFFER_CEILING_OF (limit) - : BUFFER_FLOOR_OF (limit)); - /* LIMIT is now the last (not beyond-last!) value POS_BYTE - can take on without hitting edge of buffer or the gap. */ - limit = ((direction > 0) - ? min (lim_byte - 1, min (limit, pos_byte + 20000)) - : max (lim_byte, max (limit, pos_byte - 20000))); + if (direction > 0) + { + limit = BUFFER_CEILING_OF (limit); + /* LIMIT is now the last (not beyond-last!) value POS_BYTE + can take on without hitting edge of buffer or the gap. */ + limit = min (limit, pos_byte + 20000); + limit = min (limit, lim_byte - 1); + } + else + { + limit = BUFFER_FLOOR_OF (limit); + /* LIMIT is now the last (not beyond-last!) value POS_BYTE + can take on without hitting edge of buffer or the gap. */ + limit = max (limit, pos_byte - 20000); + limit = max (limit, lim_byte); + } tail_end = BUFFER_CEILING_OF (pos_byte) + 1; tail_end_ptr = BYTE_POS_ADDR (tail_end); @@ -1864,12 +1899,15 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt, } /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES - for a match just found in the current buffer. */ + for the overall match just found in the current buffer. + Also clear out the match data for registers 1 and up. */ static void set_search_regs (beg_byte, nbytes) int beg_byte, nbytes; { + int i; + /* Make sure we have registers in which to store the match position. */ if (search_regs.num_regs == 0) @@ -1879,6 +1917,13 @@ set_search_regs (beg_byte, nbytes) search_regs.num_regs = 2; } + /* Clear out the other registers. */ + for (i = 1; i < search_regs.num_regs; i++) + { + search_regs.start[i] = -1; + search_regs.end[i] = -1; + } + search_regs.start[0] = BYTE_TO_CHAR (beg_byte); search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes); XSETBUFFER (last_thing_searched, current_buffer); @@ -1898,7 +1943,7 @@ wordify (string) int prev_c = 0; int adjust; - CHECK_STRING (string, 0); + CHECK_STRING (string); p = XSTRING (string)->data; len = XSTRING (string)->size; @@ -1906,10 +1951,7 @@ wordify (string) { int c; - if (STRING_MULTIBYTE (string)) - FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); - else - c = XSTRING (string)->data[i++]; + FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); if (SYNTAX (c) != Sword) { @@ -1924,27 +1966,45 @@ wordify (string) if (SYNTAX (prev_c) == Sword) word_count++; if (!word_count) - return build_string (""); + return empty_string; adjust = - punct_count + 5 * (word_count - 1) + 4; - val = make_uninit_multibyte_string (len + adjust, - XSTRING (string)->size_byte + adjust); + if (STRING_MULTIBYTE (string)) + val = make_uninit_multibyte_string (len + adjust, + STRING_BYTES (XSTRING (string)) + + adjust); + else + val = make_uninit_string (len + adjust); o = XSTRING (val)->data; *o++ = '\\'; *o++ = 'b'; + prev_c = 0; - for (i = 0; i < XSTRING (val)->size_byte; i++) - if (SYNTAX (p[i]) == Sword) - *o++ = p[i]; - else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count) - { - *o++ = '\\'; - *o++ = 'W'; - *o++ = '\\'; - *o++ = 'W'; - *o++ = '*'; - } + for (i = 0, i_byte = 0; i < len; ) + { + int c; + int i_byte_orig = i_byte; + + FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte); + + if (SYNTAX (c) == Sword) + { + bcopy (&XSTRING (string)->data[i_byte_orig], o, + i_byte - i_byte_orig); + o += i_byte - i_byte_orig; + } + else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count) + { + *o++ = '\\'; + *o++ = 'W'; + *o++ = '\\'; + *o++ = 'W'; + *o++ = '*'; + } + + prev_c = c; + } *o++ = '\\'; *o++ = 'b'; @@ -1953,181 +2013,199 @@ wordify (string) } DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4, - "MSearch backward: ", - "Search backward from point for STRING.\n\ -Set point to the beginning of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend before that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, position at limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (string, bound, noerror, count) + "MSearch backward: ", + doc: /* Search backward from point for STRING. +Set point to the beginning of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend before that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, position at limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. + +Search case-sensitivity is determined by the value of the variable +`case-fold-search', which see. + +See also the functions `match-beginning', `match-end' and `replace-match'. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (string, bound, noerror, count, -1, 0, 0); } DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ", - "Search forward from point for STRING.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position. nil is equivalent\n\ - to (point-max).\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (string, bound, noerror, count) + doc: /* Search forward from point for STRING. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. nil is equivalent + to (point-max). +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. + +Search case-sensitivity is determined by the value of the variable +`case-fold-search', which see. + +See also the functions `match-beginning', `match-end' and `replace-match'. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (string, bound, noerror, count, 1, 0, 0); } DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4, - "sWord search backward: ", - "Search backward from point for STRING, ignoring differences in punctuation.\n\ -Set point to the beginning of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend before that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.") - (string, bound, noerror, count) + "sWord search backward: ", + doc: /* Search backward from point for STRING, ignoring differences in punctuation. +Set point to the beginning of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend before that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (wordify (string), bound, noerror, count, -1, 1, 0); } DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4, - "sWord search: ", - "Search forward from point for STRING, ignoring differences in punctuation.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.") - (string, bound, noerror, count) + "sWord search: ", + doc: /* Search forward from point for STRING, ignoring differences in punctuation. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. */) + (string, bound, noerror, count) Lisp_Object string, bound, noerror, count; { return search_command (wordify (string), bound, noerror, count, 1, 1, 0); } DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4, - "sRE search backward: ", - "Search backward from point for match for regular expression REGEXP.\n\ -Set point to the beginning of the match, and return point.\n\ -The match found is the one starting last in the buffer\n\ -and yet ending before the origin of the search.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must start at or after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sRE search backward: ", + doc: /* Search backward from point for match for regular expression REGEXP. +Set point to the beginning of the match, and return point. +The match found is the one starting last in the buffer +and yet ending before the origin of the search. +An optional second argument bounds the search; it is a buffer position. +The match found must start at or after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, -1, 1, 0); } DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4, - "sRE search: ", - "Search forward from point for regular expression REGEXP.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sRE search: ", + doc: /* Search forward from point for regular expression REGEXP. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, 1, 1, 0); } DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4, - "sPosix search backward: ", - "Search backward from point for match for regular expression REGEXP.\n\ -Find the longest match in accord with Posix regular expression rules.\n\ -Set point to the beginning of the match, and return point.\n\ -The match found is the one starting last in the buffer\n\ -and yet ending before the origin of the search.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must start at or after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sPosix search backward: ", + doc: /* Search backward from point for match for regular expression REGEXP. +Find the longest match in accord with Posix regular expression rules. +Set point to the beginning of the match, and return point. +The match found is the one starting last in the buffer +and yet ending before the origin of the search. +An optional second argument bounds the search; it is a buffer position. +The match found must start at or after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, -1, 1, 1); } DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4, - "sPosix search: ", - "Search forward from point for regular expression REGEXP.\n\ -Find the longest match in accord with Posix regular expression rules.\n\ -Set point to the end of the occurrence found, and return point.\n\ -An optional second argument bounds the search; it is a buffer position.\n\ -The match found must not extend after that position.\n\ -Optional third argument, if t, means if fail just return nil (no error).\n\ - If not nil and not t, move to limit of search and return nil.\n\ -Optional fourth argument is repeat count--search for successive occurrences.\n\ -See also the functions `match-beginning', `match-end' and `replace-match'.") - (regexp, bound, noerror, count) + "sPosix search: ", + doc: /* Search forward from point for regular expression REGEXP. +Find the longest match in accord with Posix regular expression rules. +Set point to the end of the occurrence found, and return point. +An optional second argument bounds the search; it is a buffer position. +The match found must not extend after that position. +Optional third argument, if t, means if fail just return nil (no error). + If not nil and not t, move to limit of search and return nil. +Optional fourth argument is repeat count--search for successive occurrences. +See also the functions `match-beginning', `match-end', `match-string', +and `replace-match'. */) + (regexp, bound, noerror, count) Lisp_Object regexp, bound, noerror, count; { return search_command (regexp, bound, noerror, count, 1, 1, 1); } DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0, - "Replace text matched by last search with NEWTEXT.\n\ -If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\ -Otherwise maybe capitalize the whole text, or maybe just word initials,\n\ -based on the replaced text.\n\ -If the replaced text has only capital letters\n\ -and has at least one multiletter word, convert NEWTEXT to all caps.\n\ -If the replaced text has at least one word starting with a capital letter,\n\ -then capitalize each word in NEWTEXT.\n\n\ -If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\ -Otherwise treat `\\' as special:\n\ - `\\&' in NEWTEXT means substitute original matched text.\n\ - `\\N' means substitute what matched the Nth `\\(...\\)'.\n\ - If Nth parens didn't match, substitute nothing.\n\ - `\\\\' means insert one `\\'.\n\ -FIXEDCASE and LITERAL are optional arguments.\n\ -Leaves point at end of replacement text.\n\ -\n\ -The optional fourth argument STRING can be a string to modify.\n\ -In that case, this function creates and returns a new string\n\ -which is made by replacing the part of STRING that was matched.\n\ -\n\ -The optional fifth argument SUBEXP specifies a subexpression of the match.\n\ -It says to replace just that subexpression instead of the whole match.\n\ -This is useful only after a regular expression search or match\n\ -since only regular expressions have distinguished subexpressions.") - (newtext, fixedcase, literal, string, subexp) + doc: /* Replace text matched by last search with NEWTEXT. +If second arg FIXEDCASE is non-nil, do not alter case of replacement text. +Otherwise maybe capitalize the whole text, or maybe just word initials, +based on the replaced text. +If the replaced text has only capital letters +and has at least one multiletter word, convert NEWTEXT to all caps. +If the replaced text has at least one word starting with a capital letter, +then capitalize each word in NEWTEXT. + +If third arg LITERAL is non-nil, insert NEWTEXT literally. +Otherwise treat `\\' as special: + `\\&' in NEWTEXT means substitute original matched text. + `\\N' means substitute what matched the Nth `\\(...\\)'. + If Nth parens didn't match, substitute nothing. + `\\\\' means insert one `\\'. +FIXEDCASE and LITERAL are optional arguments. +Leaves point at end of replacement text. + +The optional fourth argument STRING can be a string to modify. +This is meaningful when the previous match was done against STRING, +using `string-match'. When used this way, `replace-match' +creates and returns a new string made by copying STRING and replacing +the part of STRING that was matched. + +The optional fifth argument SUBEXP specifies a subexpression; +it says to replace just that subexpression with NEWTEXT, +rather than replacing the entire matched text. +This is, in a vague sense, the inverse of using `\\N' in NEWTEXT; +`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts +NEWTEXT in place of subexp N. +This is useful only after a regular expression search or match, +since only regular expressions have distinguished subexpressions. */) + (newtext, fixedcase, literal, string, subexp) Lisp_Object newtext, fixedcase, literal, string, subexp; { enum { nochange, all_caps, cap_initial } case_action; - register int pos, last; + register int pos, pos_byte; int some_multiletter_word; int some_lowercase; int some_uppercase; int some_nonuppercase_initial; register int c, prevc; - int inslen; int sub; int opoint, newpoint; - CHECK_STRING (newtext, 0); + CHECK_STRING (newtext); if (! NILP (string)) - CHECK_STRING (string, 4); + CHECK_STRING (string); case_action = nochange; /* We tried an initialization */ /* but some C compilers blew it */ @@ -2139,7 +2217,7 @@ since only regular expressions have distinguished subexpressions.") sub = 0; else { - CHECK_NUMBER (subexp, 3); + CHECK_NUMBER (subexp); sub = XINT (subexp); if (sub < 0 || sub >= search_regs.num_regs) args_out_of_range (subexp, make_number (search_regs.num_regs)); @@ -2164,18 +2242,16 @@ since only regular expressions have distinguished subexpressions.") if (NILP (fixedcase)) { - int beg; /* Decide how to casify by examining the matched text. */ + int last; - if (NILP (string)) - last = CHAR_TO_BYTE (search_regs.end[sub]); - else - last = search_regs.end[sub]; + pos = search_regs.start[sub]; + last = search_regs.end[sub]; if (NILP (string)) - beg = CHAR_TO_BYTE (search_regs.start[sub]); + pos_byte = CHAR_TO_BYTE (pos); else - beg = search_regs.start[sub]; + pos_byte = string_char_to_byte (string, pos); prevc = '\n'; case_action = all_caps; @@ -2187,12 +2263,15 @@ since only regular expressions have distinguished subexpressions.") some_nonuppercase_initial = 0; some_uppercase = 0; - for (pos = beg; pos < last; pos++) + while (pos < last) { if (NILP (string)) - c = FETCH_BYTE (pos); + { + c = FETCH_CHAR (pos_byte); + INC_BOTH (pos, pos_byte); + } else - c = XSTRING (string)->data[pos]; + FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte); if (LOWERCASEP (c)) { @@ -2251,19 +2330,19 @@ since only regular expressions have distinguished subexpressions.") if desired. */ if (NILP (literal)) { - int lastpos = -1; - int lastpos_byte = -1; + int lastpos = 0; + int lastpos_byte = 0; /* We build up the substituted string in ACCUM. */ Lisp_Object accum; Lisp_Object middle; - int pos_byte; + int length = STRING_BYTES (XSTRING (newtext)); accum = Qnil; - for (pos_byte = 0, pos = 0; pos_byte < XSTRING (newtext)->size_byte;) + for (pos_byte = 0, pos = 0; pos_byte < length;) { int substart = -1; - int subend; + int subend = 0; int delbackslash = 0; FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); @@ -2271,6 +2350,7 @@ since only regular expressions have distinguished subexpressions.") if (c == '\\') { FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte); + if (c == '&') { substart = search_regs.start[sub]; @@ -2291,10 +2371,10 @@ since only regular expressions have distinguished subexpressions.") } if (substart >= 0) { - if (pos - 1 != lastpos + 1) - middle = substring_both (newtext, lastpos + 1, - lastpos_byte + 1, - pos - 1, pos_byte - 1); + if (pos - 2 != lastpos) + middle = substring_both (newtext, lastpos, + lastpos_byte, + pos - 2, pos_byte - 2); else middle = Qnil; accum = concat3 (accum, middle, @@ -2306,9 +2386,9 @@ since only regular expressions have distinguished subexpressions.") } else if (delbackslash) { - middle = substring_both (newtext, lastpos + 1, - lastpos_byte + 1, - pos, pos_byte); + middle = substring_both (newtext, lastpos, + lastpos_byte, + pos - 1, pos_byte - 1); accum = concat2 (accum, middle); lastpos = pos; @@ -2316,9 +2396,9 @@ since only regular expressions have distinguished subexpressions.") } } - if (pos != lastpos + 1) - middle = substring_both (newtext, lastpos + 1, - lastpos_byte + 1, + if (pos != lastpos) + middle = substring_both (newtext, lastpos, + lastpos_byte, pos, pos_byte); else middle = Qnil; @@ -2335,66 +2415,150 @@ since only regular expressions have distinguished subexpressions.") return concat3 (before, newtext, after); } - /* Record point, the move (quietly) to the start of the match. */ - if (PT > search_regs.start[sub]) + /* Record point, then move (quietly) to the start of the match. */ + if (PT >= search_regs.end[sub]) opoint = PT - ZV; + else if (PT > search_regs.start[sub]) + opoint = search_regs.end[sub] - ZV; else opoint = PT; - TEMP_SET_PT (search_regs.start[sub]); - - /* We insert the replacement text before the old text, and then - delete the original text. This means that markers at the - beginning or end of the original will float to the corresponding - position in the replacement. */ - if (!NILP (literal)) - Finsert_and_inherit (1, &newtext); - else + /* If we want non-literal replacement, + perform substitution on the replacement string. */ + if (NILP (literal)) { - struct gcpro gcpro1; - GCPRO1 (newtext); - - for (pos = 0; pos < XSTRING (newtext)->size; pos++) + int length = STRING_BYTES (XSTRING (newtext)); + unsigned char *substed; + int substed_alloc_size, substed_len; + int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters); + int str_multibyte = STRING_MULTIBYTE (newtext); + Lisp_Object rev_tbl; + int really_changed = 0; + + rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table) + ? Fchar_table_extra_slot (Vnonascii_translation_table, + make_number (0)) + : Qnil); + + substed_alloc_size = length * 2 + 100; + substed = (unsigned char *) xmalloc (substed_alloc_size + 1); + substed_len = 0; + + /* Go thru NEWTEXT, producing the actual text to insert in + SUBSTED while adjusting multibyteness to that of the current + buffer. */ + + for (pos_byte = 0, pos = 0; pos_byte < length;) { - int offset = PT - search_regs.start[sub]; + unsigned char str[MAX_MULTIBYTE_LENGTH]; + unsigned char *add_stuff = NULL; + int add_len = 0; + int idx = -1; + + if (str_multibyte) + { + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte); + if (!buf_multibyte) + c = multibyte_char_to_unibyte (c, rev_tbl); + } + else + { + /* Note that we don't have to increment POS. */ + c = XSTRING (newtext)->data[pos_byte++]; + if (buf_multibyte) + c = unibyte_char_to_multibyte (c); + } + + /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED, + or set IDX to a match index, which means put that part + of the buffer text into SUBSTED. */ - c = XSTRING (newtext)->data[pos]; if (c == '\\') { - c = XSTRING (newtext)->data[++pos]; + really_changed = 1; + + if (str_multibyte) + { + FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, + pos, pos_byte); + if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c)) + c = multibyte_char_to_unibyte (c, rev_tbl); + } + else + { + c = XSTRING (newtext)->data[pos_byte++]; + if (buf_multibyte) + c = unibyte_char_to_multibyte (c); + } + if (c == '&') - Finsert_buffer_substring - (Fcurrent_buffer (), - make_number (search_regs.start[sub] + offset), - make_number (search_regs.end[sub] + offset)); + idx = sub; else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0') { if (search_regs.start[c - '0'] >= 1) - Finsert_buffer_substring - (Fcurrent_buffer (), - make_number (search_regs.start[c - '0'] + offset), - make_number (search_regs.end[c - '0'] + offset)); + idx = c - '0'; } else if (c == '\\') - insert_char (c); + add_len = 1, add_stuff = "\\"; else - error ("Invalid use of `\\' in replacement text"); + { + xfree (substed); + error ("Invalid use of `\\' in replacement text"); + } } else - insert_char (c); + { + add_len = CHAR_STRING (c, str); + add_stuff = str; + } + + /* If we want to copy part of a previous match, + set up ADD_STUFF and ADD_LEN to point to it. */ + if (idx >= 0) + { + int begbyte = CHAR_TO_BYTE (search_regs.start[idx]); + add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte; + if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx]) + move_gap (search_regs.start[idx]); + add_stuff = BYTE_POS_ADDR (begbyte); + } + + /* Now the stuff we want to add to SUBSTED + is invariably ADD_LEN bytes starting at ADD_STUFF. */ + + /* Make sure SUBSTED is big enough. */ + if (substed_len + add_len >= substed_alloc_size) + { + substed_alloc_size = substed_len + add_len + 500; + substed = (unsigned char *) xrealloc (substed, + substed_alloc_size + 1); + } + + /* Now add to the end of SUBSTED. */ + if (add_stuff) + { + bcopy (add_stuff, substed + substed_len, add_len); + substed_len += add_len; + } } - UNGCPRO; + + if (really_changed) + newtext = make_string (substed, substed_len); + + xfree (substed); } - inslen = PT - (search_regs.start[sub]); - del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen); + /* Replace the old text with the new in the cleanest possible way. */ + replace_range (search_regs.start[sub], search_regs.end[sub], + newtext, 1, 0, 1); + newpoint = search_regs.start[sub] + XSTRING (newtext)->size; if (case_action == all_caps) - Fupcase_region (make_number (PT - inslen), make_number (PT)); + Fupcase_region (make_number (search_regs.start[sub]), + make_number (newpoint)); else if (case_action == cap_initial) - Fupcase_initials_region (make_number (PT - inslen), make_number (PT)); - - newpoint = PT; + Fupcase_initials_region (make_number (search_regs.start[sub]), + make_number (newpoint)); /* Put point back where it was in the text. */ if (opoint <= 0) @@ -2415,7 +2579,7 @@ match_limit (num, beginningp) { register int n; - CHECK_NUMBER (num, 0); + CHECK_NUMBER (num); n = XINT (num); if (n < 0 || n >= search_regs.num_regs) args_out_of_range (num, make_number (search_regs.num_regs)); @@ -2427,43 +2591,43 @@ match_limit (num, beginningp) } DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0, - "Return position of start of text matched by last search.\n\ -SUBEXP, a number, specifies which parenthesized expression in the last\n\ - regexp.\n\ -Value is nil if SUBEXPth pair didn't match, or there were less than\n\ - SUBEXP pairs.\n\ -Zero means the entire text matched by the whole regexp or whole string.") - (subexp) + doc: /* Return position of start of text matched by last search. +SUBEXP, a number, specifies which parenthesized expression in the last + regexp. +Value is nil if SUBEXPth pair didn't match, or there were less than + SUBEXP pairs. +Zero means the entire text matched by the whole regexp or whole string. */) + (subexp) Lisp_Object subexp; { return match_limit (subexp, 1); } DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0, - "Return position of end of text matched by last search.\n\ -SUBEXP, a number, specifies which parenthesized expression in the last\n\ - regexp.\n\ -Value is nil if SUBEXPth pair didn't match, or there were less than\n\ - SUBEXP pairs.\n\ -Zero means the entire text matched by the whole regexp or whole string.") - (subexp) + doc: /* Return position of end of text matched by last search. +SUBEXP, a number, specifies which parenthesized expression in the last + regexp. +Value is nil if SUBEXPth pair didn't match, or there were less than + SUBEXP pairs. +Zero means the entire text matched by the whole regexp or whole string. */) + (subexp) Lisp_Object subexp; { return match_limit (subexp, 0); } DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0, - "Return a list containing all info on what the last search matched.\n\ -Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\ -All the elements are markers or nil (nil if the Nth pair didn't match)\n\ -if the last match was on a buffer; integers or nil if a string was matched.\n\ -Use `store-match-data' to reinstate the data in this list.\n\ -\n\ -If INTEGERS (the optional first argument) is non-nil, always use integers\n\ -\(rather than markers) to represent buffer positions.\n\ -If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\ -to hold all the values, and if INTEGERS is non-nil, no consing is done.") - (integers, reuse) + doc: /* Return a list containing all info on what the last search matched. +Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'. +All the elements are markers or nil (nil if the Nth pair didn't match) +if the last match was on a buffer; integers or nil if a string was matched. +Use `store-match-data' to reinstate the data in this list. + +If INTEGERS (the optional first argument) is non-nil, always use integers +\(rather than markers) to represent buffer positions. +If REUSE is a list, reuse it as part of the value. If REUSE is long enough +to hold all the values, and if INTEGERS is non-nil, no consing is done. */) + (integers, reuse) Lisp_Object integers, reuse; { Lisp_Object tail, prev; @@ -2473,6 +2637,8 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.") if (NILP (last_thing_searched)) return Qnil; + prev = Qnil; + data = (Lisp_Object *) alloca ((2 * search_regs.num_regs) * sizeof (Lisp_Object)); @@ -2516,28 +2682,28 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.") /* If REUSE is a list, store as many value elements as will fit into the elements of REUSE. */ for (i = 0, tail = reuse; CONSP (tail); - i++, tail = XCONS (tail)->cdr) + i++, tail = XCDR (tail)) { if (i < 2 * len + 2) - XCONS (tail)->car = data[i]; + XSETCAR (tail, data[i]); else - XCONS (tail)->car = Qnil; + XSETCAR (tail, Qnil); prev = tail; } /* If we couldn't fit all value elements into REUSE, cons up the rest of them and add them to the end of REUSE. */ if (i < 2 * len + 2) - XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i); + XSETCDR (prev, Flist (2 * len + 2 - i, data + i)); return reuse; } -DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0, - "Set internal data on last search match from elements of LIST.\n\ -LIST should have been created by calling `match-data' previously.") - (list) +DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0, + doc: /* Set internal data on last search match from elements of LIST. +LIST should have been created by calling `match-data' previously. */) + (list) register Lisp_Object list; { register int i; @@ -2576,6 +2742,9 @@ LIST should have been created by calling `match-data' previously.") length * sizeof (regoff_t)); } + for (i = search_regs.num_regs; i < length; i++) + search_regs.start[i] = -1; + search_regs.num_regs = length; } } @@ -2590,6 +2759,8 @@ LIST should have been created by calling `match-data' previously.") } else { + int from; + if (MARKERP (marker)) { if (XMARKER (marker)->buffer == 0) @@ -2598,15 +2769,16 @@ LIST should have been created by calling `match-data' previously.") XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer); } - CHECK_NUMBER_COERCE_MARKER (marker, 0); - search_regs.start[i] = XINT (marker); + CHECK_NUMBER_COERCE_MARKER (marker); + from = XINT (marker); list = Fcdr (list); marker = Fcar (list); if (MARKERP (marker) && XMARKER (marker)->buffer == 0) XSETFASTINT (marker, 0); - CHECK_NUMBER_COERCE_MARKER (marker, 0); + CHECK_NUMBER_COERCE_MARKER (marker); + search_regs.start[i] = from; search_regs.end[i] = XINT (marker); } list = Fcdr (list); @@ -2660,22 +2832,22 @@ restore_match_data () /* Quote a string to inactivate reg-expr chars */ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, - "Return a regexp string which matches exactly STRING and nothing else.") - (string) + doc: /* Return a regexp string which matches exactly STRING and nothing else. */) + (string) Lisp_Object string; { register unsigned char *in, *out, *end; register unsigned char *temp; int backslashes_added = 0; - CHECK_STRING (string, 0); + CHECK_STRING (string); - temp = (unsigned char *) alloca (XSTRING (string)->size_byte * 2); + temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2); /* Now copy the data into the new string, inserting escapes. */ in = XSTRING (string)->data; - end = in + XSTRING (string)->size_byte; + end = in + STRING_BYTES (XSTRING (string)); out = temp; for (; in != end; in++) @@ -2688,11 +2860,13 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, *out++ = *in; } - return make_multibyte_string (temp, + return make_specified_string (temp, XSTRING (string)->size + backslashes_added, - out - temp); + out - temp, + STRING_MULTIBYTE (string)); } +void syms_of_search () { register int i; @@ -2742,6 +2916,6 @@ syms_of_search () defsubr (&Smatch_beginning); defsubr (&Smatch_end); defsubr (&Smatch_data); - defsubr (&Sstore_match_data); + defsubr (&Sset_match_data); defsubr (&Sregexp_quote); }