From a28d4396018e48479916fd624e2371aa1f75e6e7 Mon Sep 17 00:00:00 2001 From: Kenichi Handa Date: Wed, 18 May 2011 23:11:50 +0900 Subject: [PATCH] Make find_automatic_composition more efficient. --- src/ChangeLog | 6 ++ src/composite.c | 279 ++++++++++++++++++++++++++---------------------- 2 files changed, 156 insertions(+), 129 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index ceb45afebc..fd6f42acb9 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2011-05-18 Kenichi Handa + + * composite.c (CHAR_COMPOSABLE_P): Add more check for efficiency. + (BACKWARD_CHAR): Wrap the arg STOP by parenthesis. + (find_automatic_composition): Mostly rewrite for efficiency. + 2011-05-12 Drew Adams * textprop.c (Fprevious_single_char_property_change): Doc fix (bug#8655). diff --git a/src/composite.c b/src/composite.c index f069acce1c..8f8878e46a 100644 --- a/src/composite.c +++ b/src/composite.c @@ -972,11 +972,12 @@ static int _work_char; category Z? or C? are not composable except for ZWNJ and ZWJ. */ #define CHAR_COMPOSABLE_P(C) \ - ((C) == 0x200C || (C) == 0x200D \ - || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \ - (SYMBOLP (_work_val) \ - && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \ - && _work_char != 'Z'))) + ((C) > ' ' \ + && ((C) == 0x200C || (C) == 0x200D \ + || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \ + (SYMBOLP (_work_val) \ + && (_work_char = SDATA (SYMBOL_NAME (_work_val))[0]) != 'C' \ + && _work_char != 'Z')))) /* Update cmp_it->stop_pos to the next position after CHARPOS (and BYTEPOS) where character composition may happen. If BYTEPOS is @@ -1471,7 +1472,7 @@ struct position_record /* Update the members of POSITION to the previous character boundary. */ #define BACKWARD_CHAR(POSITION, STOP) \ do { \ - if ((POSITION).pos == STOP) \ + if ((POSITION).pos == (STOP)) \ (POSITION).p = GPT_ADDR; \ do { \ (POSITION).pos_byte--; \ @@ -1481,178 +1482,198 @@ struct position_record } while (0) /* This is like find_composition, but find an automatic composition - instead. If found, set *GSTRING to the glyph-string representing - the composition, and return 1. Otherwise, return 0. */ + instead. It is assured that POS is not within a static + composition. If found, set *GSTRING to the glyph-string + representing the composition, and return 1. Otherwise, *GSTRING to + Qnil, and return 0. */ static int -find_automatic_composition (EMACS_INT pos, EMACS_INT limit, EMACS_INT *start, EMACS_INT *end, Lisp_Object *gstring, Lisp_Object string) +find_automatic_composition (EMACS_INT pos, EMACS_INT limit, + EMACS_INT *start, EMACS_INT *end, + Lisp_Object *gstring, Lisp_Object string) { EMACS_INT head, tail, stop; - /* Limit to check a composition after POS. */ + /* Forward limit position of checking a composition taking a + looking-back count into account. */ EMACS_INT fore_check_limit; - struct position_record orig, cur; - - /* FIXME: It's not obvious whether these two variables need initialization. - If they do, please supply initial values. - If not, please remove this comment. */ - struct position_record check IF_LINT (= {0}), prev IF_LINT (= {0}); - - Lisp_Object check_val, val, elt; + struct position_record cur, prev; int c; Lisp_Object window; struct window *w; + int need_adjustment = 0; window = Fget_buffer_window (Fcurrent_buffer (), Qnil); if (NILP (window)) return 0; w = XWINDOW (window); - orig.pos = pos; + cur.pos = pos; if (NILP (string)) { head = BEGV, tail = ZV, stop = GPT; - orig.pos_byte = CHAR_TO_BYTE (orig.pos); - orig.p = BYTE_POS_ADDR (orig.pos_byte); + cur.pos_byte = CHAR_TO_BYTE (cur.pos); + cur.p = BYTE_POS_ADDR (cur.pos_byte); } else { head = 0, tail = SCHARS (string), stop = -1; - orig.pos_byte = string_char_to_byte (string, orig.pos); - orig.p = SDATA (string) + orig.pos_byte; + cur.pos_byte = string_char_to_byte (string, cur.pos); + cur.p = SDATA (string) + cur.pos_byte; } - if (limit < pos) - fore_check_limit = min (tail, pos + MAX_AUTO_COMPOSITION_LOOKBACK); + if (limit < 0) + /* Finding a composition covering the character after POS is the + same as setting LIMIT to POS. */ + limit = pos; + if (limit <= pos) + fore_check_limit = min (tail, pos + 1 + MAX_AUTO_COMPOSITION_LOOKBACK); else fore_check_limit = min (tail, limit + MAX_AUTO_COMPOSITION_LOOKBACK); - cur = orig; - retry: - check_val = Qnil; - /* At first, check if POS is composable. */ - c = STRING_CHAR (cur.p); - if (! CHAR_COMPOSABLE_P (c)) - { - if (limit < 0) - return 0; - if (limit >= cur.pos) - goto search_forward; - } - else - { - val = CHAR_TABLE_REF (Vcomposition_function_table, c); - if (! NILP (val)) - check_val = val, check = cur; - else - while (cur.pos + 1 < fore_check_limit) - { - EMACS_INT b, e; + /* Provided that we have these possible compositions now: - FORWARD_CHAR (cur, stop); - if (get_property_and_range (cur.pos, Qcomposition, &val, &b, &e, - Qnil) - && COMPOSITION_VALID_P (b, e, val)) - { - fore_check_limit = cur.pos; - break; - } - c = STRING_CHAR (cur.p); - if (! CHAR_COMPOSABLE_P (c)) - break; - val = CHAR_TABLE_REF (Vcomposition_function_table, c); - if (NILP (val)) - continue; - check_val = val, check = cur; - break; - } - cur = orig; - } - /* Rewind back to the position where we can safely search forward - for compositions. */ - while (cur.pos > head) - { - EMACS_INT b, e; + POS: 1 2 3 4 5 6 7 8 9 + |-A-| + |-B-|-C-|--D--| - BACKWARD_CHAR (cur, stop); - if (get_property_and_range (cur.pos, Qcomposition, &val, &b, &e, Qnil) - && COMPOSITION_VALID_P (b, e, val)) - break; + Here, it is known that characters after positions 1 and 9 can + never be composed (i.e. ! CHAR_COMPOSABLE_P (CH)), and + composition A is an invalid one because it's partially covered by + the valid composition C. And to know whether a composition is + valid or not, the only way is to start searching forward from a + position that can not be a tail part of composition (it's 2 in + the above case). + + Now we have these cases (1 through 4): + + -- character after POS is ... -- + not composable composable + LIMIT <= POS (1) (3) + POS < LIMIT (2) (4) + + Among them, in case (2), we simply search forward from POS. + + In the other cases, we at first rewind back to the position where + the previous character is not composable or the beginning of + buffer (string), then search compositions forward. In case (1) + and (3) we repeat this process until a composition is found. */ + + while (1) + { c = STRING_CHAR (cur.p); if (! CHAR_COMPOSABLE_P (c)) - break; - val = CHAR_TABLE_REF (Vcomposition_function_table, c); - if (! NILP (val)) - check_val = val, check = cur; - } - prev = cur; - /* Now search forward. */ - search_forward: - *gstring = Qnil; - if (! NILP (check_val) || limit >= orig.pos) - { - if (NILP (check_val)) - cur = orig; - else - cur = check; - while (cur.pos < fore_check_limit) { - int need_adjustment = 0; + if (limit <= pos) /* case (1) */ + { + do { + if (cur.pos == limit) + return 0; + BACKWARD_CHAR (cur, stop); + c = STRING_CHAR (cur.p); + } while (! CHAR_COMPOSABLE_P (c)); + fore_check_limit = cur.pos + 1; + } + else /* case (2) */ + /* No need of rewinding back. */ + goto search_forward; + } - if (NILP (check_val)) + /* Rewind back to the position where we can safely search + forward for compositions. It is assured that the character + at cur.pos is composable. */ + while (head < cur.pos) + { + prev = cur; + BACKWARD_CHAR (cur, stop); + c = STRING_CHAR (cur.p); + if (! CHAR_COMPOSABLE_P (c)) { - c = STRING_CHAR (cur.p); - check_val = CHAR_TABLE_REF (Vcomposition_function_table, c); + cur = prev; + break; } - for (; CONSP (check_val); check_val = XCDR (check_val)) + } + + search_forward: + /* Now search forward. */ + *gstring = Qnil; + prev = cur; /* remember the start of searching position. */ + while (cur.pos < fore_check_limit) + { + Lisp_Object val; + + c = STRING_CHAR (cur.p); + for (val = CHAR_TABLE_REF (Vcomposition_function_table, c); + CONSP (val); val = XCDR (val)) { - elt = XCAR (check_val); - if (VECTORP (elt) && ASIZE (elt) == 3 && NATNUMP (AREF (elt, 1)) - && cur.pos - XFASTINT (AREF (elt, 1)) >= head) + Lisp_Object elt = XCAR (val); + + if (VECTORP (elt) && ASIZE (elt) == 3 && NATNUMP (AREF (elt, 1))) { - check.pos = cur.pos - XFASTINT (AREF (elt, 1)); - if (check.pos == cur.pos) - check.pos_byte = cur.pos_byte; - else - check.pos_byte = CHAR_TO_BYTE (check.pos); - val = autocmp_chars (elt, check.pos, check.pos_byte, - tail, w, NULL, string); + EMACS_INT check_pos = cur.pos - XFASTINT (AREF (elt, 1)); + struct position_record check; + + if (check_pos < head + || (limit <= pos ? pos < check_pos + : limit <= check_pos)) + continue; + for (check = cur; check_pos < check.pos; ) + BACKWARD_CHAR (check, stop); + *gstring = autocmp_chars (elt, check.pos, check.pos_byte, + tail, w, NULL, string); need_adjustment = 1; - if (! NILP (val)) + if (NILP (*gstring)) { - *gstring = val; + /* As we have called Lisp, there's a possibility + that buffer/string is relocated. */ + if (NILP (string)) + cur.p = BYTE_POS_ADDR (cur.pos_byte); + else + cur.p = SDATA (string) + cur.pos_byte; + } + else + { + /* We found a candidate of a target composition. */ *start = check.pos; *end = check.pos + LGSTRING_CHAR_LEN (*gstring); - if (*start <= orig.pos ? *end > orig.pos - : limit >= orig.pos) + if (pos < limit + ? pos < *end + : *start <= pos && pos < *end) + /* This is the target composition. */ return 1; cur.pos = *end; - cur.pos_byte = CHAR_TO_BYTE (cur.pos); + if (NILP (string)) + { + cur.pos_byte = CHAR_TO_BYTE (cur.pos); + cur.p = BYTE_POS_ADDR (cur.pos_byte); + } + else + { + cur.pos_byte = string_char_to_byte (string, cur.pos); + cur.p = SDATA (string) + cur.pos_byte; + } break; } } } - if (need_adjustment) - { - /* As we have called Lisp, there's a possibility that - buffer/string is relocated. */ - if (NILP (string)) - cur.p = BYTE_POS_ADDR (cur.pos_byte); - else - cur.p = SDATA (string) + cur.pos_byte; - } - if (! CONSP (check_val)) + if (! CONSP (val)) + /* We found no composition here. */ FORWARD_CHAR (cur, stop); - check_val = Qnil; } - } - if (! NILP (*gstring)) - return (limit >= 0 || (*start <= orig.pos && *end > orig.pos)); - if (limit >= 0 && limit < orig.pos && prev.pos > head) - { + + if (pos < limit) /* case (2) and (4)*/ + return 0; + if (! NILP (*gstring)) + return 1; + if (prev.pos == head) + return 0; cur = prev; + if (need_adjustment) + { + if (NILP (string)) + cur.p = BYTE_POS_ADDR (cur.pos_byte); + else + cur.p = SDATA (string) + cur.pos_byte; + } BACKWARD_CHAR (cur, stop); - orig = cur; - fore_check_limit = orig.pos; - goto retry; } return 0; } -- 2.20.1