/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
- Copyright (C) 2000-2001, 2004-2005, 2009-2013 Free Software
+ Copyright (C) 2000-2001, 2004-2005, 2009-2014 Free Software
Foundation, Inc.
This file is part of GNU Emacs.
#include "character.h"
#include "buffer.h"
#include "dispextern.h"
+#include "region-cache.h"
static bool bidi_initialized = 0;
static Lisp_Object bidi_type_table, bidi_mirror_table;
-#define LRM_CHAR 0x200E
-#define RLM_CHAR 0x200F
-#define BIDI_EOB -1
+#define BIDI_EOB (-1)
/* Data type for describing the bidirectional character categories. */
typedef enum {
UNKNOWN_BC,
NEUTRAL,
WEAK,
- STRONG
+ STRONG,
+ EXPLICIT_FORMATTING
} bidi_category_t;
/* UAX#9 says to search only for L, AL, or R types of characters, and
if (default_type == UNKNOWN_BT)
emacs_abort ();
- if (override == NEUTRAL_DIR)
- return default_type;
-
switch (default_type)
{
- /* Although UAX#9 does not tell, it doesn't make sense to
- override NEUTRAL_B and LRM/RLM characters. */
+ case WEAK_BN:
case NEUTRAL_B:
case LRE:
case LRO:
case RLO:
case PDF:
return default_type;
+ /* FIXME: The isolate controls are treated as BN until we add
+ support for UBA v6.3. */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
+ return WEAK_BN;
default:
- switch (ch)
- {
- case LRM_CHAR:
- case RLM_CHAR:
- return default_type;
- default:
- if (override == L2R) /* X6 */
- return STRONG_L;
- else if (override == R2L)
- return STRONG_R;
- else
- emacs_abort (); /* can't happen: handled above */
- }
+ if (override == L2R)
+ return STRONG_L;
+ else if (override == R2L)
+ return STRONG_R;
+ else
+ return default_type;
}
}
case STRONG_L:
case STRONG_R:
case STRONG_AL:
- case LRE:
- case LRO:
- case RLE:
- case RLO:
return STRONG;
- case PDF: /* ??? really?? */
case WEAK_EN:
case WEAK_ES:
case WEAK_ET:
case WEAK_CS:
case WEAK_NSM:
case WEAK_BN:
+ /* FIXME */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
return WEAK;
case NEUTRAL_B:
case NEUTRAL_S:
case NEUTRAL_WS:
case NEUTRAL_ON:
return NEUTRAL;
+ case LRE:
+ case LRO:
+ case RLE:
+ case RLO:
+ case PDF:
+#if 0
+ /* FIXME: This awaits implementation of isolate support. */
+ case LRI:
+ case RLI:
+ case FSI:
+ case PDI:
+#endif
+ return EXPLICIT_FORMATTING;
default:
emacs_abort ();
}
are zero-based character positions in S, BEGBYTE is byte position
corresponding to BEG. UNIBYTE means S is a unibyte string. */
static ptrdiff_t
-bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
- const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte)
+bidi_count_bytes (const unsigned char *s, ptrdiff_t beg,
+ ptrdiff_t begbyte, ptrdiff_t end, bool unibyte)
{
ptrdiff_t pos = beg;
const unsigned char *p = s + begbyte, *start = p;
return STRING_CHAR (s);
}
-/* Fetch and return the character at BYTEPOS/CHARPOS. If that
+/* Fetch and return the character at CHARPOS/BYTEPOS. If that
character is covered by a display string, treat the entire run of
covered characters as a single character, either u+2029 or u+FFFC,
and return their combined length in CH_LEN and NCHARS. DISP_POS
string to iterate, or NULL if iterating over a buffer or a Lisp
string; in the latter case, STRING->lstring is the Lisp string. */
static int
-bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
+bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
int *disp_prop, struct bidi_string_data *string,
+ struct window *w,
bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
{
int ch;
if (charpos < endpos && charpos > *disp_pos)
{
SET_TEXT_POS (pos, charpos, bytepos);
- *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
+ *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
}
&& *disp_prop)
{
SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
- *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
+ *disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
}
return val;
}
+/* If the user has requested the long scans caching, make sure that
+ BIDI cache is enabled. Otherwise, make sure it's disabled. */
+
+static struct region_cache *
+bidi_paragraph_cache_on_off (void)
+{
+ struct buffer *cache_buffer = current_buffer;
+ bool indirect_p = false;
+
+ /* For indirect buffers, make sure to use the cache of their base
+ buffer. */
+ if (cache_buffer->base_buffer)
+ {
+ cache_buffer = cache_buffer->base_buffer;
+ indirect_p = true;
+ }
+
+ /* Don't turn on or off the cache in the base buffer, if the value
+ of cache-long-scans of the base buffer is inconsistent with that.
+ This is because doing so will just make the cache pure overhead,
+ since if we turn it on via indirect buffer, it will be
+ immediately turned off by its base buffer. */
+ if (NILP (BVAR (current_buffer, cache_long_scans)))
+ {
+ if (!indirect_p
+ || NILP (BVAR (cache_buffer, cache_long_scans)))
+ {
+ if (cache_buffer->bidi_paragraph_cache)
+ {
+ free_region_cache (cache_buffer->bidi_paragraph_cache);
+ cache_buffer->bidi_paragraph_cache = 0;
+ }
+ }
+ return NULL;
+ }
+ else
+ {
+ if (!indirect_p
+ || !NILP (BVAR (cache_buffer, cache_long_scans)))
+ {
+ if (!cache_buffer->bidi_paragraph_cache)
+ cache_buffer->bidi_paragraph_cache = new_region_cache ();
+ }
+ return cache_buffer->bidi_paragraph_cache;
+ }
+}
+
/* On my 2005-vintage machine, searching back for paragraph start
takes ~1 ms per line. And bidi_paragraph_init is called 4 times
when user types C-p. The number below limits each call to
{
Lisp_Object re = paragraph_start_re;
ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
- ptrdiff_t n = 0;
+ struct region_cache *bpc = bidi_paragraph_cache_on_off ();
+ ptrdiff_t n = 0, oldpos = pos, next;
+ struct buffer *cache_buffer = current_buffer;
+
+ if (cache_buffer->base_buffer)
+ cache_buffer = cache_buffer->base_buffer;
while (pos_byte > BEGV_BYTE
&& n++ < MAX_PARAGRAPH_SEARCH
display string? And what if a display string covering some
of the text over which we scan back includes
paragraph_start_re? */
- pos = find_next_newline_no_quit (pos - 1, -1);
- pos_byte = CHAR_TO_BYTE (pos);
+ DEC_BOTH (pos, pos_byte);
+ if (bpc && region_cache_backward (cache_buffer, bpc, pos, &next))
+ {
+ pos = next, pos_byte = CHAR_TO_BYTE (pos);
+ break;
+ }
+ else
+ pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
}
if (n >= MAX_PARAGRAPH_SEARCH)
- pos_byte = BEGV_BYTE;
+ pos = BEGV, pos_byte = BEGV_BYTE;
+ if (bpc)
+ know_region_cache (cache_buffer, bpc, pos, oldpos);
+ /* Positions returned by the region cache are not limited to
+ BEGV..ZV range, so we limit them here. */
+ pos_byte = clip_to_bounds (BEGV_BYTE, pos_byte, ZV_BYTE);
return pos_byte;
}
bytepos = pstartbyte;
if (!string_p)
pos = BYTE_TO_CHAR (bytepos);
- ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
- &bidi_it->string,
+ ch = bidi_fetch_char (pos, bytepos, &disp_pos, &disp_prop,
+ &bidi_it->string, bidi_it->w,
bidi_it->frame_window_p, &ch_len, &nchars);
type = bidi_get_type (ch, NEUTRAL_DIR);
&& bidi_at_paragraph_end (pos, bytepos) >= -1)
break;
/* Fetch next character and advance to get past it. */
- ch = bidi_fetch_char (bytepos, pos, &disp_pos,
- &disp_prop, &bidi_it->string,
+ ch = bidi_fetch_char (pos, bytepos, &disp_pos,
+ &disp_prop, &bidi_it->string, bidi_it->w,
bidi_it->frame_window_p, &ch_len, &nchars);
pos += nchars;
bytepos += ch_len;
/* FXIME: What if p is covered by a display
string? See also a FIXME inside
bidi_find_paragraph_start. */
- p--;
- pbyte = CHAR_TO_BYTE (p);
+ DEC_BOTH (p, pbyte);
prevpbyte = bidi_find_paragraph_start (p, pbyte);
}
pstartbyte = prevpbyte;
: bidi_it->string.s);
if (bidi_it->charpos < 0)
- bidi_it->charpos = 0;
- bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
- bidi_it->string.unibyte);
+ bidi_it->charpos = bidi_it->bytepos = 0;
+ eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
+ bidi_it->charpos,
+ bidi_it->string.unibyte));
}
else
{
if (bidi_it->charpos < BEGV)
- bidi_it->charpos = BEGV;
- bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
+ {
+ bidi_it->charpos = BEGV;
+ bidi_it->bytepos = BEGV_BYTE;
+ }
+ eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
}
}
/* Don't move at end of buffer/string. */
/* Fetch the character at BYTEPOS. If it is covered by a
display string, treat the entire run of covered characters as
a single character u+FFFC. */
- curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
+ curchar = bidi_fetch_char (bidi_it->charpos, bidi_it->bytepos,
&bidi_it->disp_pos, &bidi_it->disp_prop,
- &bidi_it->string, bidi_it->frame_window_p,
+ &bidi_it->string, bidi_it->w,
+ bidi_it->frame_window_p,
&bidi_it->ch_len, &bidi_it->nchars);
}
bidi_it->ch = curchar;
next_type = STRONG_R;
break;
case WEAK_BN:
+ case NEUTRAL_ON: /* W6/Retaining */
if (!bidi_explicit_dir_char (bidi_it->ch))
emacs_abort (); /* can't happen: BNs are skipped */
/* FALLTHROUGH */
if (bidi_it->nchars <= 0)
emacs_abort ();
do {
- ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
- fwp, &clen, &nc);
+ ch = bidi_fetch_char (cpos += nc, bpos += clen, &disp_pos, &dpp, &bs,
+ bidi_it->w, fwp, &clen, &nc);
if (ch == '\n' || ch == BIDI_EOB)
chtype = NEUTRAL_B;
else
next_level = bidi_peek_at_next_level (bidi_it);
while (next_level != expected_next_level)
{
+ /* If next_level is -1, it means we have an unresolved level
+ in the cache, which at this point should not happen. If
+ it does, we will infloop. */
+ eassert (next_level >= 0);
expected_next_level += incr;
level_to_search += incr;
bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);