X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/7e570fbf3ef8ccd31df2651f5d2775c5697d5950..91415fb1cd0f913be6e8a2409b0d9cdca4352f3f:/src/bidi.c diff --git a/src/bidi.c b/src/bidi.c index 73fec3533a..18dce1931d 100644 --- a/src/bidi.c +++ b/src/bidi.c @@ -1,6 +1,6 @@ /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs. - Copyright (C) 2000-2001, 2004-2005, 2009-2012 - Free Software Foundation, Inc. + Copyright (C) 2000-2001, 2004-2005, 2009-2013 Free Software + Foundation, Inc. This file is part of GNU Emacs. @@ -56,27 +56,26 @@ along with GNU Emacs. If not, see . */ #include #include -#include #include "lisp.h" #include "character.h" #include "buffer.h" #include "dispextern.h" +#include "region-cache.h" static bool bidi_initialized = 0; static Lisp_Object bidi_type_table, bidi_mirror_table; -#define LRM_CHAR 0x200E -#define RLM_CHAR 0x200F -#define BIDI_EOB -1 +#define BIDI_EOB (-1) /* Data type for describing the bidirectional character categories. */ typedef enum { UNKNOWN_BC, NEUTRAL, WEAK, - STRONG + STRONG, + EXPLICIT_FORMATTING } bidi_category_t; /* UAX#9 says to search only for L, AL, or R types of characters, and @@ -97,7 +96,7 @@ static Lisp_Object Qparagraph_start, Qparagraph_separate; /* Return the bidi type of a character CH, subject to the current directional OVERRIDE. */ -static inline bidi_type_t +static bidi_type_t bidi_get_type (int ch, bidi_dir_t override) { bidi_type_t default_type; @@ -115,13 +114,9 @@ bidi_get_type (int ch, bidi_dir_t override) if (default_type == UNKNOWN_BT) emacs_abort (); - if (override == NEUTRAL_DIR) - return default_type; - switch (default_type) { - /* Although UAX#9 does not tell, it doesn't make sense to - override NEUTRAL_B and LRM/RLM characters. */ + case WEAK_BN: case NEUTRAL_B: case LRE: case LRO: @@ -129,31 +124,31 @@ bidi_get_type (int ch, bidi_dir_t override) case RLO: case PDF: return default_type; + /* FIXME: The isolate controls are treated as BN until we add + support for UBA v6.3. */ + case LRI: + case RLI: + case FSI: + case PDI: + return WEAK_BN; default: - switch (ch) - { - case LRM_CHAR: - case RLM_CHAR: - return default_type; - default: - if (override == L2R) /* X6 */ - return STRONG_L; - else if (override == R2L) - return STRONG_R; - else - emacs_abort (); /* can't happen: handled above */ - } + if (override == L2R) + return STRONG_L; + else if (override == R2L) + return STRONG_R; + else + return default_type; } } -static inline void +static void bidi_check_type (bidi_type_t type) { eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON); } /* Given a bidi TYPE of a character, return its category. */ -static inline bidi_category_t +static bidi_category_t bidi_get_category (bidi_type_t type) { switch (type) @@ -163,12 +158,7 @@ bidi_get_category (bidi_type_t type) case STRONG_L: case STRONG_R: case STRONG_AL: - case LRE: - case LRO: - case RLE: - case RLO: return STRONG; - case PDF: /* ??? really?? */ case WEAK_EN: case WEAK_ES: case WEAK_ET: @@ -176,12 +166,30 @@ bidi_get_category (bidi_type_t type) case WEAK_CS: case WEAK_NSM: case WEAK_BN: + /* FIXME */ + case LRI: + case RLI: + case FSI: + case PDI: return WEAK; case NEUTRAL_B: case NEUTRAL_S: case NEUTRAL_WS: case NEUTRAL_ON: return NEUTRAL; + case LRE: + case LRO: + case RLE: + case RLO: + case PDF: +#if 0 + /* FIXME: This awaits implementation of isolate support. */ + case LRI: + case RLI: + case FSI: + case PDI: +#endif + return EXPLICIT_FORMATTING; default: emacs_abort (); } @@ -227,7 +235,7 @@ bidi_mirror_char (int c) embedding levels on either side of the run boundary. Also, update the saved info about previously seen characters, since that info is generally valid for a single level run. */ -static inline void +static void bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) { int higher_level = (level_before > level_after ? level_before : level_after); @@ -258,7 +266,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) /* Push the current embedding level and override status; reset the current level to LEVEL and the current override status to OVERRIDE. */ -static inline void +static void bidi_push_embedding_level (struct bidi_it *bidi_it, int level, bidi_dir_t override) { @@ -270,7 +278,7 @@ bidi_push_embedding_level (struct bidi_it *bidi_it, /* Pop the embedding level and directional override status from the stack, and return the new level. */ -static inline int +static int bidi_pop_embedding_level (struct bidi_it *bidi_it) { /* UAX#9 says to ignore invalid PDFs. */ @@ -280,7 +288,7 @@ bidi_pop_embedding_level (struct bidi_it *bidi_it) } /* Record in SAVED_INFO the information about the current character. */ -static inline void +static void bidi_remember_char (struct bidi_saved_info *saved_info, struct bidi_it *bidi_it) { @@ -296,18 +304,14 @@ bidi_remember_char (struct bidi_saved_info *saved_info, /* Copy the bidi iterator from FROM to TO. To save cycles, this only copies the part of the level stack that is actually in use. */ -static inline void +static void bidi_copy_it (struct bidi_it *to, struct bidi_it *from) { - int i; - - /* Copy everything except the level stack and beyond. */ - memcpy (to, from, offsetof (struct bidi_it, level_stack[0])); - - /* Copy the active part of the level stack. */ - to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */ - for (i = 1; i <= from->stack_idx; i++) - to->level_stack[i] = from->level_stack[i]; + /* Copy everything from the start through the active part of + the level stack. */ + memcpy (to, from, + (offsetof (struct bidi_it, level_stack[1]) + + from->stack_idx * sizeof from->level_stack[0])); } @@ -345,7 +349,7 @@ enum intact. This is called when the cached information is no more useful for the current iteration, e.g. when we were reseated to a new position on the same object. */ -static inline void +static void bidi_cache_reset (void) { bidi_cache_idx = bidi_cache_start; @@ -356,7 +360,7 @@ bidi_cache_reset (void) iterator for reordering a buffer or a string that does not come from display properties, because that means all the previously cached info is of no further use. */ -static inline void +static void bidi_cache_shrink (void) { if (bidi_cache_size > BIDI_CACHE_CHUNK) @@ -367,7 +371,7 @@ bidi_cache_shrink (void) bidi_cache_reset (); } -static inline void +static void bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it) { int current_scan_dir = bidi_it->scan_dir; @@ -384,7 +388,7 @@ bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it) level less or equal to LEVEL. if LEVEL is -1, disregard the resolved levels in cached states. DIR, if non-zero, means search in that direction from the last cache hit. */ -static inline ptrdiff_t +static ptrdiff_t bidi_cache_search (ptrdiff_t charpos, int level, int dir) { ptrdiff_t i, i_start; @@ -489,7 +493,7 @@ bidi_cache_find_level_change (int level, int dir, bool before) return -1; } -static inline void +static void bidi_cache_ensure_space (ptrdiff_t idx) { /* Enlarge the cache as needed. */ @@ -511,7 +515,7 @@ bidi_cache_ensure_space (ptrdiff_t idx) } } -static inline void +static void bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved) { ptrdiff_t idx; @@ -568,7 +572,7 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved) bidi_cache_idx = idx + 1; } -static inline bidi_type_t +static bidi_type_t bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it) { ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir); @@ -588,7 +592,7 @@ bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it) return UNKNOWN_BT; } -static inline int +static int bidi_peek_at_next_level (struct bidi_it *bidi_it) { if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1) @@ -612,7 +616,7 @@ bidi_push_it (struct bidi_it *bidi_it) /* Save the current iterator state in its entirety after the last used cache slot. */ bidi_cache_ensure_space (bidi_cache_idx); - memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it)); + bidi_cache[bidi_cache_idx++] = *bidi_it; /* Push the current cache start onto the stack. */ eassert (bidi_cache_sp < IT_STACK_SIZE); @@ -636,7 +640,7 @@ bidi_pop_it (struct bidi_it *bidi_it) bidi_cache_idx = bidi_cache_start - 1; /* Restore the bidi iterator state saved in the cache. */ - memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it)); + *bidi_it = bidi_cache[bidi_cache_idx]; /* Pop the previous cache start from the stack. */ if (bidi_cache_sp <= 0) @@ -791,7 +795,7 @@ bidi_initialize (void) /* Do whatever UAX#9 clause X8 says should be done at paragraph's end. */ -static inline void +static void bidi_set_paragraph_end (struct bidi_it *bidi_it) { bidi_it->invalid_levels = 0; @@ -873,9 +877,9 @@ bidi_line_init (struct bidi_it *bidi_it) /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END are zero-based character positions in S, BEGBYTE is byte position corresponding to BEG. UNIBYTE means S is a unibyte string. */ -static inline ptrdiff_t -bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg, - const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte) +static ptrdiff_t +bidi_count_bytes (const unsigned char *s, ptrdiff_t beg, + ptrdiff_t begbyte, ptrdiff_t end, bool unibyte) { ptrdiff_t pos = beg; const unsigned char *p = s + begbyte, *start = p; @@ -897,25 +901,25 @@ bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg, return p - start; } -/* Fetch and returns the character at byte position BYTEPOS. If S is +/* Fetch and return the character at byte position BYTEPOS. If S is non-NULL, fetch the character from string S; otherwise fetch the character from the current buffer. UNIBYTE means S is a unibyte string. */ -static inline int +static int bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte) { if (s) { + s += bytepos; if (unibyte) - return s[bytepos]; - else - return STRING_CHAR (s + bytepos); + return *s; } else - return FETCH_MULTIBYTE_CHAR (bytepos); + s = BYTE_POS_ADDR (bytepos); + return STRING_CHAR (s); } -/* Fetch and return the character at BYTEPOS/CHARPOS. If that +/* Fetch and return the character at CHARPOS/BYTEPOS. If that character is covered by a display string, treat the entire run of covered characters as a single character, either u+2029 or u+FFFC, and return their combined length in CH_LEN and NCHARS. DISP_POS @@ -929,9 +933,10 @@ bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte) u+2029 to handle it as a paragraph separator. STRING->s is the C string to iterate, or NULL if iterating over a buffer or a Lisp string; in the latter case, STRING->lstring is the Lisp string. */ -static inline int -bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos, +static int +bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos, int *disp_prop, struct bidi_string_data *string, + struct window *w, bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars) { int ch; @@ -945,7 +950,7 @@ bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos, if (charpos < endpos && charpos > *disp_pos) { SET_TEXT_POS (pos, charpos, bytepos); - *disp_pos = compute_display_string_pos (&pos, string, frame_window_p, + *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p, disp_prop); } @@ -1050,7 +1055,7 @@ bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos, && *disp_prop) { SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len); - *disp_pos = compute_display_string_pos (&pos, string, frame_window_p, + *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p, disp_prop); } @@ -1089,6 +1094,29 @@ bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos) return val; } +/* If the user has requested the long scans caching, make sure that + BIDI cache is enabled. Otherwise, make sure it's disabled. */ + +static struct region_cache * +bidi_paragraph_cache_on_off (void) +{ + if (NILP (BVAR (current_buffer, cache_long_scans))) + { + if (current_buffer->bidi_paragraph_cache) + { + free_region_cache (current_buffer->bidi_paragraph_cache); + current_buffer->bidi_paragraph_cache = 0; + } + return NULL; + } + else + { + if (!current_buffer->bidi_paragraph_cache) + current_buffer->bidi_paragraph_cache = new_region_cache (); + return current_buffer->bidi_paragraph_cache; + } +} + /* On my 2005-vintage machine, searching back for paragraph start takes ~1 ms per line. And bidi_paragraph_init is called 4 times when user types C-p. The number below limits each call to @@ -1104,7 +1132,8 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte) { Lisp_Object re = paragraph_start_re; ptrdiff_t limit = ZV, limit_byte = ZV_BYTE; - ptrdiff_t n = 0; + struct region_cache *bpc = bidi_paragraph_cache_on_off (); + ptrdiff_t n = 0, oldpos = pos, next; while (pos_byte > BEGV_BYTE && n++ < MAX_PARAGRAPH_SEARCH @@ -1114,11 +1143,22 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte) display string? And what if a display string covering some of the text over which we scan back includes paragraph_start_re? */ - pos = find_next_newline_no_quit (pos - 1, -1); - pos_byte = CHAR_TO_BYTE (pos); + DEC_BOTH (pos, pos_byte); + if (bpc && region_cache_backward (current_buffer, bpc, pos, &next)) + { + pos = next, pos_byte = CHAR_TO_BYTE (pos); + break; + } + else + pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte); } if (n >= MAX_PARAGRAPH_SEARCH) - pos_byte = BEGV_BYTE; + pos = BEGV, pos_byte = BEGV_BYTE; + if (bpc) + know_region_cache (current_buffer, bpc, pos, oldpos); + /* Positions returned by the region cache are not limited to + BEGV..ZV range, so we limit them here. */ + pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE); return pos_byte; } @@ -1228,8 +1268,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) bytepos = pstartbyte; if (!string_p) pos = BYTE_TO_CHAR (bytepos); - ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop, - &bidi_it->string, + ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop, + &bidi_it->string, bidi_it->w, bidi_it->frame_window_p, &ch_len, &nchars); type = bidi_get_type (ch, NEUTRAL_DIR); @@ -1256,8 +1296,8 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) && bidi_at_paragraph_end (pos, bytepos) >= -1) break; /* Fetch next character and advance to get past it. */ - ch = bidi_fetch_char (bytepos, pos, &disp_pos, - &disp_prop, &bidi_it->string, + ch = bidi_fetch_char (pos, bytepos, &disp_pos, + &disp_prop, &bidi_it->string, bidi_it->w, bidi_it->frame_window_p, &ch_len, &nchars); pos += nchars; bytepos += ch_len; @@ -1287,8 +1327,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) /* FXIME: What if p is covered by a display string? See also a FIXME inside bidi_find_paragraph_start. */ - p--; - pbyte = CHAR_TO_BYTE (p); + DEC_BOTH (p, pbyte); prevpbyte = bidi_find_paragraph_start (p, pbyte); } pstartbyte = prevpbyte; @@ -1319,7 +1358,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p) The rest of this file constitutes the core of the UBA implementation. ***********************************************************************/ -static inline bool +static bool bidi_explicit_dir_char (int ch) { bidi_type_t ch_type; @@ -1361,15 +1400,19 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) : bidi_it->string.s); if (bidi_it->charpos < 0) - bidi_it->charpos = 0; - bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos, - bidi_it->string.unibyte); + bidi_it->charpos = bidi_it->bytepos = 0; + eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0, + bidi_it->charpos, + bidi_it->string.unibyte)); } else { if (bidi_it->charpos < BEGV) - bidi_it->charpos = BEGV; - bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos); + { + bidi_it->charpos = BEGV; + bidi_it->bytepos = BEGV_BYTE; + } + eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos)); } } /* Don't move at end of buffer/string. */ @@ -1402,9 +1445,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it) /* Fetch the character at BYTEPOS. If it is covered by a display string, treat the entire run of covered characters as a single character u+FFFC. */ - curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos, + curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos, &bidi_it->disp_pos, &bidi_it->disp_prop, - &bidi_it->string, bidi_it->frame_window_p, + &bidi_it->string, bidi_it->w, + bidi_it->frame_window_p, &bidi_it->ch_len, &bidi_it->nchars); } bidi_it->ch = curchar; @@ -1842,7 +1886,7 @@ bidi_resolve_weak (struct bidi_it *bidi_it) /* Resolve the type of a neutral character according to the type of surrounding strong text and the current embedding level. */ -static inline bidi_type_t +static bidi_type_t bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev) { /* N1: European and Arabic numbers are treated as though they were R. */ @@ -1978,6 +2022,7 @@ bidi_resolve_neutral (struct bidi_it *bidi_it) next_type = STRONG_R; break; case WEAK_BN: + case NEUTRAL_ON: /* W6/Retaining */ if (!bidi_explicit_dir_char (bidi_it->ch)) emacs_abort (); /* can't happen: BNs are skipped */ /* FALLTHROUGH */ @@ -2194,8 +2239,8 @@ bidi_level_of_next_char (struct bidi_it *bidi_it) if (bidi_it->nchars <= 0) emacs_abort (); do { - ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs, - fwp, &clen, &nc); + ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs, + bidi_it->w, fwp, &clen, &nc); if (ch == '\n' || ch == BIDI_EOB) chtype = NEUTRAL_B; else @@ -2396,6 +2441,10 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it) next_level = bidi_peek_at_next_level (bidi_it); while (next_level != expected_next_level) { + /* If next_level is -1, it means we have an unresolved level + in the cache, which at this point should not happen. If + it does, we will infloop. */ + eassert (next_level >= 0); expected_next_level += incr; level_to_search += incr; bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);