details about its algorithm that finds the next visual-order
character by resolving their levels on the fly.
- The two other entry points are bidi_paragraph_init and
+ Two other entry points are bidi_paragraph_init and
bidi_mirror_char. The first determines the base direction of a
paragraph, while the second returns the mirrored version of its
argument character.
+ A few auxiliary entry points are used to initialize the bidi
+ iterator for iterating an object (buffer or string), push and pop
+ the bidi iterator state, and save and restore the state of the bidi
+ cache.
+
If you want to understand the code, you will have to read it
together with the relevant portions of UAX#9. The comments include
references to UAX#9 rules, for that very reason.
int level, bidi_dir_t override)
{
bidi_it->stack_idx++;
- if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
- abort ();
+ xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
bidi_it->level_stack[bidi_it->stack_idx].level = level;
bidi_it->level_stack[bidi_it->stack_idx].override = override;
}
#define BIDI_CACHE_CHUNK 200
static struct bidi_it *bidi_cache;
-static size_t bidi_cache_size = 0;
-static size_t elsz = sizeof (struct bidi_it);
+static EMACS_INT bidi_cache_size = 0;
+enum { elsz = sizeof (struct bidi_it) };
static EMACS_INT bidi_cache_idx; /* next unused cache slot */
static EMACS_INT bidi_cache_last_idx; /* slot of last cache hit */
static EMACS_INT bidi_cache_start = 0; /* start of cache for this
}
static inline void
-bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
+bidi_cache_fetch_state (EMACS_INT idx, struct bidi_it *bidi_it)
{
int current_scan_dir = bidi_it->scan_dir;
level less or equal to LEVEL. if LEVEL is -1, disregard the
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
-static inline int
+static inline EMACS_INT
bidi_cache_search (EMACS_INT charpos, int level, int dir)
{
- int i, i_start;
+ EMACS_INT i, i_start;
- if (bidi_cache_idx)
+ if (bidi_cache_idx > bidi_cache_start)
{
+ if (bidi_cache_last_idx == -1)
+ bidi_cache_last_idx = bidi_cache_idx - 1;
if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
{
dir = -1;
C, searching backwards (DIR = -1) for LEVEL = 2 will return the
index of slot B or A, depending whether BEFORE is, respectively,
non-zero or zero. */
-static int
+static EMACS_INT
bidi_cache_find_level_change (int level, int dir, int before)
{
if (bidi_cache_idx)
{
- int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
+ EMACS_INT i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
int incr = before ? 1 : 0;
+ xassert (!dir || bidi_cache_last_idx >= 0);
+
if (!dir)
dir = -1;
else if (!incr)
}
static inline void
-bidi_cache_ensure_space (int idx)
+bidi_cache_ensure_space (EMACS_INT idx)
{
/* Enlarge the cache as needed. */
if (idx >= bidi_cache_size)
{
- bidi_cache_size += BIDI_CACHE_CHUNK;
+ while (idx >= bidi_cache_size)
+ bidi_cache_size += BIDI_CACHE_CHUNK;
bidi_cache =
(struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
}
static inline void
bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
{
- int idx;
+ EMACS_INT idx;
/* We should never cache on backward scans. */
if (bidi_it->scan_dir == -1)
if (idx > bidi_cache_start &&
(bidi_it->charpos > (bidi_cache[idx - 1].charpos
+ bidi_cache[idx - 1].nchars)
- || bidi_it->charpos < bidi_cache[0].charpos))
+ || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
{
bidi_cache_reset ();
idx = bidi_cache_start;
static inline bidi_type_t
bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
{
- int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
+ EMACS_INT i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
if (i >= bidi_cache_start)
{
/***********************************************************************
Pushing and popping the bidi iterator state
***********************************************************************/
-/* 10-slot stack for saving the start of the previous level of the
- cache. xdisp.c maintains a 5-slot cache for its iterator state,
- and we need just a little bit more. */
-#define CACHE_STACK_SIZE 10
-static int bidi_cache_start_stack[CACHE_STACK_SIZE];
+/* 5-slot stack for saving the start of the previous level of the
+ cache. xdisp.c maintains a 5-slot stack for its iterator state,
+ and we need the same size of our stack. */
+static EMACS_INT bidi_cache_start_stack[IT_STACK_SIZE];
static int bidi_cache_sp;
/* Push the bidi iterator state in preparation for reordering a
different object, e.g. display string found at certain buffer
- position. Pushing the bidi iterator boils to saving its entire
- state on the cache and starting a new cache "stacked" on top of the
- current cache. */
+ position. Pushing the bidi iterator boils down to saving its
+ entire state on the cache and starting a new cache "stacked" on top
+ of the current cache. */
void
bidi_push_it (struct bidi_it *bidi_it)
{
memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
/* Push the current cache start onto the stack. */
- if (bidi_cache_sp >= CACHE_STACK_SIZE)
- abort ();
+ xassert (bidi_cache_sp < IT_STACK_SIZE);
bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
/* Start a new level of cache, and make it empty. */
bidi_cache_last_idx = -1;
}
+/* Stash away a copy of the cache and its control variables. */
+void *
+bidi_shelve_cache (void)
+{
+ unsigned char *databuf;
+
+ if (bidi_cache_idx == 0)
+ return NULL;
+
+ databuf = xmalloc (sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack)
+ + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
+ + sizeof (bidi_cache_last_idx));
+ memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
+ memcpy (databuf + sizeof (bidi_cache_idx),
+ bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
+ memcpy (databuf + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it),
+ bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
+ memcpy (databuf + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack),
+ &bidi_cache_sp, sizeof (bidi_cache_sp));
+ memcpy (databuf + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
+ &bidi_cache_start, sizeof (bidi_cache_start));
+ memcpy (databuf + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+ + sizeof (bidi_cache_start),
+ &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
+
+ return databuf;
+}
+
+/* Restore the cache state from a copy stashed away by bidi_shelve_cache. */
+void
+bidi_unshelve_cache (void *databuf)
+{
+ unsigned char *p = databuf;
+
+ if (!p)
+ {
+ /* A NULL pointer means an empty cache. */
+ bidi_cache_start = 0;
+ bidi_cache_sp = 0;
+ bidi_cache_reset ();
+ }
+ else
+ {
+ memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
+ bidi_cache_ensure_space (bidi_cache_idx);
+ memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
+ bidi_cache_idx * sizeof (struct bidi_it));
+ memcpy (bidi_cache_start_stack,
+ p + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it),
+ sizeof (bidi_cache_start_stack));
+ memcpy (&bidi_cache_sp,
+ p + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack),
+ sizeof (bidi_cache_sp));
+ memcpy (&bidi_cache_start,
+ p + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
+ sizeof (bidi_cache_start));
+ memcpy (&bidi_cache_last_idx,
+ p + sizeof (bidi_cache_idx)
+ + bidi_cache_idx * sizeof (struct bidi_it)
+ + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+ + sizeof (bidi_cache_start),
+ sizeof (bidi_cache_last_idx));
+
+ xfree (p);
+ }
+}
+
\f
/***********************************************************************
Initialization
Fetching characters
***********************************************************************/
-/* Count bytes in multibyte string S between BEG/BEGBYTE and END. BEG
- and END are zero-based character positions in S, BEGBYTE is byte
- position corresponding to BEG. */
+/* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
+ are zero-based character positions in S, BEGBYTE is byte position
+ corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
+ string. */
static inline EMACS_INT
bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
- const EMACS_INT begbyte, const EMACS_INT end)
+ const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
{
EMACS_INT pos = beg;
const unsigned char *p = s + begbyte, *start = p;
- if (!CHAR_HEAD_P (*p))
- abort ();
-
- while (pos < end)
+ if (unibyte)
+ p = s + end;
+ else
{
- p += BYTES_BY_CHAR_HEAD (*p);
- pos++;
+ if (!CHAR_HEAD_P (*p))
+ abort ();
+
+ while (pos < end)
+ {
+ p += BYTES_BY_CHAR_HEAD (*p);
+ pos++;
+ }
}
return p - start;
/* Fetch and returns the character at byte position BYTEPOS. If S is
non-NULL, fetch the character from string S; otherwise fetch the
- character from the current buffer. */
+ character from the current buffer. UNIBYTE non-zero means S is a
+ unibyte string. */
static inline int
-bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s)
+bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
{
if (s)
- return STRING_CHAR (s + bytepos);
+ {
+ if (unibyte)
+ return s[bytepos];
+ else
+ return STRING_CHAR (s + bytepos);
+ }
else
return FETCH_MULTIBYTE_CHAR (bytepos);
}
ch = 0xFFFC;
disp_end_pos = compute_display_string_end (*disp_pos, string);
*nchars = disp_end_pos - *disp_pos;
+ if (*nchars <= 0)
+ abort ();
if (string->s)
*ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
- disp_end_pos);
+ disp_end_pos, string->unibyte);
else if (STRINGP (string->lstring))
*ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
- bytepos, disp_end_pos);
+ bytepos, disp_end_pos, string->unibyte);
else
*ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
}
{
if (string->s)
{
- EMACS_INT len;
+ int len;
- ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
- *ch_len = len;
+ if (!string->unibyte)
+ {
+ ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
+ *ch_len = len;
+ }
+ else
+ {
+ ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
+ *ch_len = 1;
+ }
}
else if (STRINGP (string->lstring))
{
- EMACS_INT len;
+ int len;
- ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos, len);
- *ch_len = len;
+ if (!string->unibyte)
+ {
+ ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
+ len);
+ *ch_len = len;
+ }
+ else
+ {
+ ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
+ *ch_len = 1;
+ }
}
else
{
pos = bidi_it->charpos;
s = STRINGP (bidi_it->string.lstring) ?
SDATA (bidi_it->string.lstring) : bidi_it->string.s;
- if (bytepos > begbyte && bidi_char_at_pos (bytepos, s) == '\n')
+ if (bytepos > begbyte
+ && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
{
bytepos++;
pos++;
|| type == LRE || type == LRO));
type = bidi_get_type (ch, NEUTRAL_DIR))
{
- if (!string_p
- && type == NEUTRAL_B
- && bidi_at_paragraph_end (pos, bytepos) >= -1)
- break;
if (pos >= end)
{
/* Pretend there's a paragraph separator at end of
type = NEUTRAL_B;
break;
}
+ if (!string_p
+ && type == NEUTRAL_B
+ && bidi_at_paragraph_end (pos, bytepos) >= -1)
+ break;
/* Fetch next character and advance to get past it. */
ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
bidi_it->frame_window_p, &ch_len, &nchars);
if (bidi_it->charpos < 0)
bidi_it->charpos = 0;
- bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos);
+ bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
+ bidi_it->string.unibyte);
}
else
{
&& bidi_it->ignore_bn_limit == -1 /* only if not already known */
&& bidi_it->charpos < eob /* not already at EOB */
&& bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
- + bidi_it->ch_len, s)))
+ + bidi_it->ch_len, s,
+ bidi_it->string.unibyte)))
{
/* Avoid pushing and popping embedding levels if the level run
is empty, as this breaks level runs where it shouldn't.
bidi_copy_it (&saved_it, bidi_it);
while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
- + bidi_it->ch_len, s)))
+ + bidi_it->ch_len, s,
+ bidi_it->string.unibyte)))
{
/* This advances to the next character, skipping any
characters covered by display strings. */
next_char =
bidi_it->charpos + bidi_it->nchars >= eob
? BIDI_EOB
- : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s);
+ : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
+ bidi_it->string.unibyte);
type_of_next = bidi_get_type (next_char, override);
if (type_of_next == WEAK_BN
next_char =
bidi_it->charpos + bidi_it->nchars >= eob
? BIDI_EOB
- : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s);
+ : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
+ bidi_it->string.unibyte);
type_of_next = bidi_get_type (next_char, override);
if (type_of_next == WEAK_ET
bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
{
int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
- int idx;
+ EMACS_INT idx;
/* Try the cache first. */
if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
}
/* The code below can call eval, and thus cause GC. If we are
- iterating a Lisp string, make sure it won't GCed. */
+ iterating a Lisp string, make sure it won't be GCed. */
if (STRINGP (bidi_it->string.lstring))
GCPRO1 (bidi_it->string.lstring);
fprintf (stderr, "The cache is empty.\n");
return;
}
- fprintf (stderr, "Total of %d state%s in cache:\n",
+ fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)