- /* The general approach is that we are going to maintain that we know */
- /* the first (closest to the present position, in whatever direction */
- /* we're searching) character that could possibly be the last */
- /* (furthest from present position) character of a valid match. We */
- /* advance the state of our knowledge by looking at that character */
- /* and seeing whether it indeed matches the last character of the */
- /* pattern. If it does, we take a closer look. If it does not, we */
- /* move our pointer (to putative last characters) as far as is */
- /* logically possible. This amount of movement, which I call a */
- /* stride, will be the length of the pattern if the actual character */
- /* appears nowhere in the pattern, otherwise it will be the distance */
- /* from the last occurrence of that character to the end of the */
- /* pattern. */
- /* As a coding trick, an enormous stride is coded into the table for */
- /* characters that match the last character. This allows use of only */
- /* a single test, a test for having gone past the end of the */
- /* permissible match region, to test for both possible matches (when */
- /* the stride goes past the end immediately) and failure to */
- /* match (where you get nudged past the end one stride at a time). */
-
- /* Here we make a "mickey mouse" BM table. The stride of the search */
- /* is determined only by the last character of the putative match. */
- /* If that character does not match, we will stride the proper */
- /* distance to propose a match that superimposes it on the last */
- /* instance of a character that matches it (per trt), or misses */
- /* it entirely if there is none. */
-
- dirlen = len_byte * direction;
- infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
- if (direction < 0)
- pat = (base_pat += len_byte - 1);
- BM_tab_base = BM_tab;
- BM_tab += 0400;
- j = dirlen; /* to get it in a register */
- /* A character that does not appear in the pattern induces a */
- /* stride equal to the pattern length. */
- while (BM_tab_base != BM_tab)
+
+ stop:
+ if (n == 0)
+ {
+ if (forward)
+ set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
+ else
+ set_search_regs (multibyte ? pos_byte : pos, len_byte);
+
+ return pos;
+ }
+ else if (n > 0)
+ return -n;
+ else
+ return n;
+}
+\f
+/* Do Boyer-Moore search N times for the string BASE_PAT,
+ whose length is LEN/LEN_BYTE,
+ from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
+ DIRECTION says which direction we search in.
+ TRT and INVERSE_TRT are translation tables.
+ Characters in PAT are already translated by TRT.
+
+ This kind of search works if all the characters in BASE_PAT that
+ have nontrivial translation are the same aside from the last byte.
+ This makes it possible to translate just the last byte of a
+ character, and do so after just a simple test of the context.
+ CHARSET_BASE is nonzero iff there is such a non-ASCII character.
+
+ If that criterion is not satisfied, do not call this function. */
+
+static int
+boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
+ pos, pos_byte, lim, lim_byte, charset_base)
+ int n;
+ unsigned char *base_pat;
+ int len, len_byte;
+ Lisp_Object trt;
+ Lisp_Object inverse_trt;
+ int pos, pos_byte;
+ int lim, lim_byte;
+ int charset_base;
+{
+ int direction = ((n > 0) ? 1 : -1);
+ register int dirlen;
+ int infinity, limit, stride_for_teases = 0;
+ register int *BM_tab;
+ int *BM_tab_base;
+ register unsigned char *cursor, *p_limit;
+ register int i, j;
+ unsigned char *pat, *pat_end;
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+
+ unsigned char simple_translate[0400];
+ /* These are set to the preceding bytes of a byte to be translated
+ if charset_base is nonzero. As the maximum byte length of a
+ multibyte character is 4, we have to check at most three previous
+ bytes. */
+ int translate_prev_byte1 = 0;
+ int translate_prev_byte2 = 0;
+ int translate_prev_byte3 = 0;
+
+#ifdef C_ALLOCA
+ int BM_tab_space[0400];
+ BM_tab = &BM_tab_space[0];
+#else
+ BM_tab = (int *) alloca (0400 * sizeof (int));
+#endif
+ /* The general approach is that we are going to maintain that we know */
+ /* the first (closest to the present position, in whatever direction */
+ /* we're searching) character that could possibly be the last */
+ /* (furthest from present position) character of a valid match. We */
+ /* advance the state of our knowledge by looking at that character */
+ /* and seeing whether it indeed matches the last character of the */
+ /* pattern. If it does, we take a closer look. If it does not, we */
+ /* move our pointer (to putative last characters) as far as is */
+ /* logically possible. This amount of movement, which I call a */
+ /* stride, will be the length of the pattern if the actual character */
+ /* appears nowhere in the pattern, otherwise it will be the distance */
+ /* from the last occurrence of that character to the end of the */
+ /* pattern. */
+ /* As a coding trick, an enormous stride is coded into the table for */
+ /* characters that match the last character. This allows use of only */
+ /* a single test, a test for having gone past the end of the */
+ /* permissible match region, to test for both possible matches (when */
+ /* the stride goes past the end immediately) and failure to */
+ /* match (where you get nudged past the end one stride at a time). */
+
+ /* Here we make a "mickey mouse" BM table. The stride of the search */
+ /* is determined only by the last character of the putative match. */
+ /* If that character does not match, we will stride the proper */
+ /* distance to propose a match that superimposes it on the last */
+ /* instance of a character that matches it (per trt), or misses */
+ /* it entirely if there is none. */
+
+ dirlen = len_byte * direction;
+ infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
+
+ /* Record position after the end of the pattern. */
+ pat_end = base_pat + len_byte;
+ /* BASE_PAT points to a character that we start scanning from.
+ It is the first character in a forward search,
+ the last character in a backward search. */
+ if (direction < 0)
+ base_pat = pat_end - 1;
+
+ BM_tab_base = BM_tab;
+ BM_tab += 0400;
+ j = dirlen; /* to get it in a register */
+ /* A character that does not appear in the pattern induces a */
+ /* stride equal to the pattern length. */
+ while (BM_tab_base != BM_tab)
+ {
+ *--BM_tab = j;
+ *--BM_tab = j;
+ *--BM_tab = j;
+ *--BM_tab = j;
+ }
+
+ /* We use this for translation, instead of TRT itself.
+ We fill this in to handle the characters that actually
+ occur in the pattern. Others don't matter anyway! */
+ bzero (simple_translate, sizeof simple_translate);
+ for (i = 0; i < 0400; i++)
+ simple_translate[i] = i;
+
+ if (charset_base)
+ {
+ /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
+ byte following them are the target of translation. */
+ int sample_char = charset_base | 0x20;
+ unsigned char str[MAX_MULTIBYTE_LENGTH];
+ int len = CHAR_STRING (sample_char, str);
+
+ translate_prev_byte1 = str[len - 2];
+ if (len > 2)