/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
- Copyright (C) 2000-2001, 2004-2005, 2009-2012
- Free Software Foundation, Inc.
+ Copyright (C) 2000-2001, 2004-2005, 2009-2013 Free Software
+ Foundation, Inc.
This file is part of GNU Emacs.
#include "character.h"
#include "buffer.h"
#include "dispextern.h"
+#include "region-cache.h"
static bool bidi_initialized = 0;
static Lisp_Object bidi_type_table, bidi_mirror_table;
-#define LRM_CHAR 0x200E
-#define RLM_CHAR 0x200F
-#define BIDI_EOB -1
+#define BIDI_EOB (-1)
/* Data type for describing the bidirectional character categories. */
typedef enum {
UNKNOWN_BC,
NEUTRAL,
WEAK,
- STRONG
+ STRONG,
+ EXPLICIT_FORMATTING
} bidi_category_t;
/* UAX#9 says to search only for L, AL, or R types of characters, and
/* Return the bidi type of a character CH, subject to the current
directional OVERRIDE. */
-static inline bidi_type_t
+static bidi_type_t
bidi_get_type (int ch, bidi_dir_t override)
{
bidi_type_t default_type;
if (default_type == UNKNOWN_BT)
emacs_abort ();
- if (override == NEUTRAL_DIR)
- return default_type;
-
switch (default_type)
{
- /* Although UAX#9 does not tell, it doesn't make sense to
- override NEUTRAL_B and LRM/RLM characters. */
+ case WEAK_BN:
case NEUTRAL_B:
case LRE:
case LRO:
case RLO:
case PDF:
return default_type;
+ /* FIXME: The isolate controls are treated as BN until we add
+ support for UBA v6.3. */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
+ return WEAK_BN;
default:
- switch (ch)
- {
- case LRM_CHAR:
- case RLM_CHAR:
- return default_type;
- default:
- if (override == L2R) /* X6 */
- return STRONG_L;
- else if (override == R2L)
- return STRONG_R;
- else
- emacs_abort (); /* can't happen: handled above */
- }
+ if (override == L2R)
+ return STRONG_L;
+ else if (override == R2L)
+ return STRONG_R;
+ else
+ return default_type;
}
}
-static inline void
+static void
bidi_check_type (bidi_type_t type)
{
eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
}
/* Given a bidi TYPE of a character, return its category. */
-static inline bidi_category_t
+static bidi_category_t
bidi_get_category (bidi_type_t type)
{
switch (type)
case STRONG_L:
case STRONG_R:
case STRONG_AL:
- case LRE:
- case LRO:
- case RLE:
- case RLO:
return STRONG;
- case PDF: /* ??? really?? */
case WEAK_EN:
case WEAK_ES:
case WEAK_ET:
case WEAK_CS:
case WEAK_NSM:
case WEAK_BN:
+ /* FIXME */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
return WEAK;
case NEUTRAL_B:
case NEUTRAL_S:
case NEUTRAL_WS:
case NEUTRAL_ON:
return NEUTRAL;
+ case LRE:
+ case LRO:
+ case RLE:
+ case RLO:
+ case PDF:
+#if 0
+ /* FIXME: This awaits implementation of isolate support. */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
+#endif
+ return EXPLICIT_FORMATTING;
default:
emacs_abort ();
}
embedding levels on either side of the run boundary. Also, update
the saved info about previously seen characters, since that info is
generally valid for a single level run. */
-static inline void
+static void
bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
{
int higher_level = (level_before > level_after ? level_before : level_after);
/* Push the current embedding level and override status; reset the
current level to LEVEL and the current override status to OVERRIDE. */
-static inline void
+static void
bidi_push_embedding_level (struct bidi_it *bidi_it,
int level, bidi_dir_t override)
{
/* Pop the embedding level and directional override status from the
stack, and return the new level. */
-static inline int
+static int
bidi_pop_embedding_level (struct bidi_it *bidi_it)
{
/* UAX#9 says to ignore invalid PDFs. */
}
/* Record in SAVED_INFO the information about the current character. */
-static inline void
+static void
bidi_remember_char (struct bidi_saved_info *saved_info,
struct bidi_it *bidi_it)
{
/* Copy the bidi iterator from FROM to TO. To save cycles, this only
copies the part of the level stack that is actually in use. */
-static inline void
+static void
bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
{
/* Copy everything from the start through the active part of
intact. This is called when the cached information is no more
useful for the current iteration, e.g. when we were reseated to a
new position on the same object. */
-static inline void
+static void
bidi_cache_reset (void)
{
bidi_cache_idx = bidi_cache_start;
iterator for reordering a buffer or a string that does not come
from display properties, because that means all the previously
cached info is of no further use. */
-static inline void
+static void
bidi_cache_shrink (void)
{
if (bidi_cache_size > BIDI_CACHE_CHUNK)
bidi_cache_reset ();
}
-static inline void
+static void
bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
{
int current_scan_dir = bidi_it->scan_dir;
level less or equal to LEVEL. if LEVEL is -1, disregard the
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
-static inline ptrdiff_t
+static ptrdiff_t
bidi_cache_search (ptrdiff_t charpos, int level, int dir)
{
ptrdiff_t i, i_start;
return -1;
}
-static inline void
+static void
bidi_cache_ensure_space (ptrdiff_t idx)
{
/* Enlarge the cache as needed. */
}
}
-static inline void
+static void
bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
{
ptrdiff_t idx;
bidi_cache_idx = idx + 1;
}
-static inline bidi_type_t
+static bidi_type_t
bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
{
ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
return UNKNOWN_BT;
}
-static inline int
+static int
bidi_peek_at_next_level (struct bidi_it *bidi_it)
{
if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
/* Do whatever UAX#9 clause X8 says should be done at paragraph's
end. */
-static inline void
+static void
bidi_set_paragraph_end (struct bidi_it *bidi_it)
{
bidi_it->invalid_levels = 0;
/* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
are zero-based character positions in S, BEGBYTE is byte position
corresponding to BEG. UNIBYTE means S is a unibyte string. */
-static inline ptrdiff_t
-bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
- const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte)
+static ptrdiff_t
+bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
+ ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
{
ptrdiff_t pos = beg;
const unsigned char *p = s + begbyte, *start = p;
non-NULL, fetch the character from string S; otherwise fetch the
character from the current buffer. UNIBYTE means S is a
unibyte string. */
-static inline int
+static int
bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
{
if (s)
return STRING_CHAR (s);
}
-/* Fetch and return the character at BYTEPOS/CHARPOS. If that
+/* Fetch and return the character at CHARPOS/BYTEPOS. If that
character is covered by a display string, treat the entire run of
covered characters as a single character, either u+2029 or u+FFFC,
and return their combined length in CH_LEN and NCHARS. DISP_POS
u+2029 to handle it as a paragraph separator. STRING->s is the C
string to iterate, or NULL if iterating over a buffer or a Lisp
string; in the latter case, STRING->lstring is the Lisp string. */
-static inline int
-bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
+static int
+bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
int *disp_prop, struct bidi_string_data *string,
+ struct window *w,
bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
{
int ch;
if (charpos < endpos && charpos > *disp_pos)
{
SET_TEXT_POS (pos, charpos, bytepos);
- *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
+ *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
}
&& *disp_prop)
{
SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
- *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
+ *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
}
return val;
}
+/* If the user has requested the long scans caching, make sure that
+ BIDI cache is enabled. Otherwise, make sure it's disabled. */
+
+static struct region_cache *
+bidi_paragraph_cache_on_off (void)
+{
+ if (NILP (BVAR (current_buffer, cache_long_scans)))
+ {
+ if (current_buffer->bidi_paragraph_cache)
+ {
+ free_region_cache (current_buffer->bidi_paragraph_cache);
+ current_buffer->bidi_paragraph_cache = 0;
+ }
+ return NULL;
+ }
+ else
+ {
+ if (!current_buffer->bidi_paragraph_cache)
+ current_buffer->bidi_paragraph_cache = new_region_cache ();
+ return current_buffer->bidi_paragraph_cache;
+ }
+}
+
/* On my 2005-vintage machine, searching back for paragraph start
takes ~1 ms per line. And bidi_paragraph_init is called 4 times
when user types C-p. The number below limits each call to
{
Lisp_Object re = paragraph_start_re;
ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
- ptrdiff_t n = 0;
+ struct region_cache *bpc = bidi_paragraph_cache_on_off ();
+ ptrdiff_t n = 0, oldpos = pos, next;
while (pos_byte > BEGV_BYTE
&& n++ < MAX_PARAGRAPH_SEARCH
display string? And what if a display string covering some
of the text over which we scan back includes
paragraph_start_re? */
- pos = find_next_newline_no_quit (pos - 1, -1);
- pos_byte = CHAR_TO_BYTE (pos);
+ DEC_BOTH (pos, pos_byte);
+ if (bpc && region_cache_backward (current_buffer, bpc, pos, &next))
+ {
+ pos = next, pos_byte = CHAR_TO_BYTE (pos);
+ break;
+ }
+ else
+ pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
}
if (n >= MAX_PARAGRAPH_SEARCH)
- pos_byte = BEGV_BYTE;
+ pos = BEGV, pos_byte = BEGV_BYTE;
+ if (bpc)
+ know_region_cache (current_buffer, bpc, pos, oldpos);
+ /* Positions returned by the region cache are not limited to
+ BEGV..ZV range, so we limit them here. */
+ pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
return pos_byte;
}
bytepos = pstartbyte;
if (!string_p)
pos = BYTE_TO_CHAR (bytepos);
- ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
- &bidi_it->string,
+ ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop,
+ &bidi_it->string, bidi_it->w,
bidi_it->frame_window_p, &ch_len, &nchars);
type = bidi_get_type (ch, NEUTRAL_DIR);
&& bidi_at_paragraph_end (pos, bytepos) >= -1)
break;
/* Fetch next character and advance to get past it. */
- ch = bidi_fetch_char (bytepos, pos, &disp_pos,
- &disp_prop, &bidi_it->string,
+ ch = bidi_fetch_char (pos, bytepos, &disp_pos,
+ &disp_prop, &bidi_it->string, bidi_it->w,
bidi_it->frame_window_p, &ch_len, &nchars);
pos += nchars;
bytepos += ch_len;
/* FXIME: What if p is covered by a display
string? See also a FIXME inside
bidi_find_paragraph_start. */
- p--;
- pbyte = CHAR_TO_BYTE (p);
+ DEC_BOTH (p, pbyte);
prevpbyte = bidi_find_paragraph_start (p, pbyte);
}
pstartbyte = prevpbyte;
The rest of this file constitutes the core of the UBA implementation.
***********************************************************************/
-static inline bool
+static bool
bidi_explicit_dir_char (int ch)
{
bidi_type_t ch_type;
: bidi_it->string.s);
if (bidi_it->charpos < 0)
- bidi_it->charpos = 0;
- bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
- bidi_it->string.unibyte);
+ bidi_it->charpos = bidi_it->bytepos = 0;
+ eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
+ bidi_it->charpos,
+ bidi_it->string.unibyte));
}
else
{
if (bidi_it->charpos < BEGV)
- bidi_it->charpos = BEGV;
- bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
+ {
+ bidi_it->charpos = BEGV;
+ bidi_it->bytepos = BEGV_BYTE;
+ }
+ eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
}
}
/* Don't move at end of buffer/string. */
/* Fetch the character at BYTEPOS. If it is covered by a
display string, treat the entire run of covered characters as
a single character u+FFFC. */
- curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
+ curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
&bidi_it->disp_pos, &bidi_it->disp_prop,
- &bidi_it->string, bidi_it->frame_window_p,
+ &bidi_it->string, bidi_it->w,
+ bidi_it->frame_window_p,
&bidi_it->ch_len, &bidi_it->nchars);
}
bidi_it->ch = curchar;
/* Resolve the type of a neutral character according to the type of
surrounding strong text and the current embedding level. */
-static inline bidi_type_t
+static bidi_type_t
bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
{
/* N1: European and Arabic numbers are treated as though they were R. */
next_type = STRONG_R;
break;
case WEAK_BN:
+ case NEUTRAL_ON: /* W6/Retaining */
if (!bidi_explicit_dir_char (bidi_it->ch))
emacs_abort (); /* can't happen: BNs are skipped */
/* FALLTHROUGH */
if (bidi_it->nchars <= 0)
emacs_abort ();
do {
- ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
- fwp, &clen, &nc);
+ ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
+ bidi_it->w, fwp, &clen, &nc);
if (ch == '\n' || ch == BIDI_EOB)
chtype = NEUTRAL_B;
else
next_level = bidi_peek_at_next_level (bidi_it);
while (next_level != expected_next_level)
{
+ /* If next_level is -1, it means we have an unresolved level
+ in the cache, which at this point should not happen. If
+ it does, we will infloop. */
+ eassert (next_level >= 0);
expected_next_level += incr;
level_to_search += incr;
bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);