as per UAX#9, a part of the Unicode Standard.
Unlike the reference and most other implementations, this one is
- designed to be called once for every character in the buffer.
+ designed to be called once for every character in the buffer or
+ string.
The main entry point is bidi_get_next_char_visually. Each time it
is called, it finds the next character in the visual order, and
more details about its algorithm that finds the next visual-order
character by resolving their levels on the fly.
+ The two other entry points are bidi_paragraph_init and
+ bidi_mirror_char. The first determines the base direction of a
+ paragraph, while the second returns the mirrored version of its
+ argument character.
+
If you want to understand the code, you will have to read it
together with the relevant portions of UAX#9. The comments include
references to UAX#9 rules, for that very reason.
make_number (bidi_type[i].type));
fallback_paragraph_start_re =
- XSYMBOL (Fintern_soft (build_string ("paragraph-start"), Qnil))->value;
+ Fsymbol_value (Fintern_soft (build_string ("paragraph-start"), Qnil));
if (!STRINGP (fallback_paragraph_start_re))
fallback_paragraph_start_re = build_string ("\f\\|[ \t]*$");
staticpro (&fallback_paragraph_start_re);
Qparagraph_start = intern ("paragraph-start");
staticpro (&Qparagraph_start);
fallback_paragraph_separate_re =
- XSYMBOL (Fintern_soft (build_string ("paragraph-separate"), Qnil))->value;
+ Fsymbol_value (Fintern_soft (build_string ("paragraph-separate"), Qnil));
if (!STRINGP (fallback_paragraph_separate_re))
fallback_paragraph_separate_re = build_string ("[ \t\f]*$");
staticpro (&fallback_paragraph_separate_re);
/* Return the bidi type of a character CH, subject to the current
directional OVERRIDE. */
-bidi_type_t
+static INLINE bidi_type_t
bidi_get_type (int ch, bidi_dir_t override)
{
bidi_type_t default_type;
}
/* Given a bidi TYPE of a character, return its category. */
-bidi_category_t
+static INLINE bidi_category_t
bidi_get_category (bidi_type_t type)
{
switch (type)
/* Copy the bidi iterator from FROM to TO. To save cycles, this only
copies the part of the level stack that is actually in use. */
-static inline void
+static INLINE void
bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
{
int i;
static int bidi_cache_idx;
static int bidi_cache_last_idx;
-static inline void
+static INLINE void
bidi_cache_reset (void)
{
bidi_cache_idx = 0;
bidi_cache_last_idx = -1;
}
-static inline void
+static INLINE void
bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
{
int current_scan_dir = bidi_it->scan_dir;
level less or equal to LEVEL. if LEVEL is -1, disregard the
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
-static inline int
+static INLINE int
bidi_cache_search (int charpos, int level, int dir)
{
int i, i_start;
return -1;
}
-static inline void
+static INLINE void
bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
{
int idx;
bidi_cache_idx = idx + 1;
}
-static inline bidi_type_t
+static INLINE bidi_type_t
bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
{
int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
return UNKNOWN_BT;
}
-static inline int
+static INLINE int
bidi_peek_at_next_level (struct bidi_it *bidi_it)
{
if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
following the buffer position, -1 if position is at the beginning
of a new paragraph, or -2 if position is neither at beginning nor
at end of a paragraph. */
-EMACS_INT
+static EMACS_INT
bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
{
Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate,
embedding levels on either side of the run boundary. Also, update
the saved info about previously seen characters, since that info is
generally valid for a single level run. */
-static inline void
+static INLINE void
bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
{
int higher_level = level_before > level_after ? level_before : level_after;
int ch, ch_len;
EMACS_INT pos;
bidi_type_t type;
- EMACS_INT sep_len;
/* If we are inside a paragraph separator, we are just waiting
for the separator to be exhausted; use the previous paragraph
/* Do whatever UAX#9 clause X8 says should be done at paragraph's
end. */
-static inline void
+static INLINE void
bidi_set_paragraph_end (struct bidi_it *bidi_it)
{
bidi_it->invalid_levels = 0;
bidi_it->new_paragraph = 1;
bidi_it->separator_limit = -1;
bidi_it->type = NEUTRAL_B;
- bidi_it->type_after_w1 = UNKNOWN_BT;
- bidi_it->orig_type = UNKNOWN_BT;
+ bidi_it->type_after_w1 = NEUTRAL_B;
+ bidi_it->orig_type = NEUTRAL_B;
bidi_it->prev_was_pdf = 0;
- bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
+ bidi_it->prev.type = bidi_it->prev.type_after_w1 =
+ bidi_it->prev.orig_type = UNKNOWN_BT;
bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
bidi_it->last_strong.orig_type = UNKNOWN_BT;
bidi_it->next_for_neutral.charpos = -1;
/* Push the current embedding level and override status; reset the
current level to LEVEL and the current override status to OVERRIDE. */
-static inline void
+static INLINE void
bidi_push_embedding_level (struct bidi_it *bidi_it,
int level, bidi_dir_t override)
{
/* Pop the embedding level and directional override status from the
stack, and return the new level. */
-static inline int
+static INLINE int
bidi_pop_embedding_level (struct bidi_it *bidi_it)
{
/* UAX#9 says to ignore invalid PDFs. */
}
/* Record in SAVED_INFO the information about the current character. */
-static inline void
+static INLINE void
bidi_remember_char (struct bidi_saved_info *saved_info,
struct bidi_it *bidi_it)
{
/* Resolve the type of a neutral character according to the type of
surrounding strong text and the current embedding level. */
-static inline bidi_type_t
+static INLINE bidi_type_t
bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
{
/* N1: European and Arabic numbers are treated as though they were R. */
return STRONG_R;
}
-static inline int
+static INLINE int
bidi_explicit_dir_char (int c)
{
/* FIXME: this should be replaced with a lookup table with suitable
if (prev_level < new_level
&& bidi_it->type == WEAK_BN
&& bidi_it->ignore_bn_limit == 0 /* only if not already known */
- && bidi_it->ch != BIDI_EOB /* not already at EOB */
+ && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
&& bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
+ bidi_it->ch_len)))
{
/* Advance in the buffer, resolve weak types and return the type of
the next character after weak type resolution. */
-bidi_type_t
+static bidi_type_t
bidi_resolve_weak (struct bidi_it *bidi_it)
{
bidi_type_t type;
if (type == WEAK_NSM) /* W1 */
{
/* Note that we don't need to consider the case where the
- prev character has its type overridden by an RLO or LRO:
- such characters are outside the current level run, and
- thus not relevant to this NSM. Thus, NSM gets the
- orig_type of the previous character. */
- if (bidi_it->prev.type != UNKNOWN_BT)
- type = bidi_it->prev.orig_type;
+ prev character has its type overridden by an RLO or LRO,
+ because then either the type of this NSM would have been
+ also overridden, or the previous character is outside the
+ current level run, and thus not relevant to this NSM.
+ This is why NSM gets the type_after_w1 of the previous
+ character. */
+ if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
+ /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
+ && bidi_it->prev.type_after_w1 != NEUTRAL_B)
+ type = bidi_it->prev.type_after_w1;
else if (bidi_it->sor == R2L)
type = STRONG_R;
else if (bidi_it->sor == L2R)
return type;
}
-bidi_type_t
+static bidi_type_t
bidi_resolve_neutral (struct bidi_it *bidi_it)
{
int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
/* Given an iterator state in BIDI_IT, advance one character position
in the buffer to the next character (in the logical order), resolve
the bidi type of that next character, and return that type. */
-bidi_type_t
+static bidi_type_t
bidi_type_of_next_char (struct bidi_it *bidi_it)
{
bidi_type_t type;
the buffer to the next character (in the logical order), resolve
the embedding and implicit levels of that next character, and
return the resulting level. */
-int
+static int
bidi_level_of_next_char (struct bidi_it *bidi_it)
{
bidi_type_t type;
if (bidi_it->scan_dir == 1)
{
/* There's no sense in trying to advance if we hit end of text. */
- if (bidi_it->ch == BIDI_EOB)
+ if (bidi_it->bytepos >= ZV_BYTE)
return bidi_it->resolved_level;
/* Record the info about the previous character. */