/* Low-level bidirectional buffer-scanning functions for GNU Emacs.
- Copyright (C) 2000, 2001, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2000, 2001, 2004, 2005, 2009, 2010
+ Free Software Foundation, Inc.
This file is part of GNU Emacs.
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
-
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Eli Zaretskii <eliz@gnu.org>.
as per UAX#9, a part of the Unicode Standard.
Unlike the reference and most other implementations, this one is
- designed to be called once for every character in the buffer.
+ designed to be called once for every character in the buffer or
+ string.
The main entry point is bidi_get_next_char_visually. Each time it
is called, it finds the next character in the visual order, and
more details about its algorithm that finds the next visual-order
character by resolving their levels on the fly.
+ The two other entry points are bidi_paragraph_init and
+ bidi_mirror_char. The first determines the base direction of a
+ paragraph, while the second returns the mirrored version of its
+ argument character.
+
If you want to understand the code, you will have to read it
together with the relevant portions of UAX#9. The comments include
references to UAX#9 rules, for that very reason.
#include <string.h>
#endif
+#include <setjmp.h>
+
#include "lisp.h"
#include "buffer.h"
#include "character.h"
make_number (bidi_type[i].type));
fallback_paragraph_start_re =
- XSYMBOL (Fintern_soft (build_string ("paragraph-start"), Qnil))->value;
+ Fsymbol_value (Fintern_soft (build_string ("paragraph-start"), Qnil));
if (!STRINGP (fallback_paragraph_start_re))
fallback_paragraph_start_re = build_string ("\f\\|[ \t]*$");
staticpro (&fallback_paragraph_start_re);
Qparagraph_start = intern ("paragraph-start");
staticpro (&Qparagraph_start);
fallback_paragraph_separate_re =
- XSYMBOL (Fintern_soft (build_string ("paragraph-separate"), Qnil))->value;
+ Fsymbol_value (Fintern_soft (build_string ("paragraph-separate"), Qnil));
if (!STRINGP (fallback_paragraph_separate_re))
fallback_paragraph_separate_re = build_string ("[ \t\f]*$");
staticpro (&fallback_paragraph_separate_re);
/* Return the bidi type of a character CH, subject to the current
directional OVERRIDE. */
-bidi_type_t
+static INLINE bidi_type_t
bidi_get_type (int ch, bidi_dir_t override)
{
bidi_type_t default_type;
}
/* Given a bidi TYPE of a character, return its category. */
-bidi_category_t
+static INLINE bidi_category_t
bidi_get_category (bidi_type_t type)
{
switch (type)
bidi_mirror_char (int c)
{
static const char mirrored_pairs[] = "()<>[]{}";
- const char *p = strchr (mirrored_pairs, c);
+ const char *p = c > 0 && c < 128 ? strchr (mirrored_pairs, c) : NULL;
if (p)
{
size_t i = p - mirrored_pairs;
- if ((i & 1) == 0)
- return mirrored_pairs[i + 1];
- else
- return mirrored_pairs[i - 1];
+ return mirrored_pairs [(i ^ 1)];
}
return c;
}
/* Copy the bidi iterator from FROM to TO. To save cycles, this only
copies the part of the level stack that is actually in use. */
-static inline void
+static INLINE void
bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
{
- int save_first_elt = to->first_elt;
int i;
- /* Copy everything except the level stack. */
- memcpy (to, from, ((int)&((struct bidi_it *)0)->level_stack[0]));
- to->first_elt = save_first_elt;
- if (to->first_elt != 0 && to->first_elt != 1)
- to->first_elt = 0;
+ /* Copy everything except the level stack and beyond. */
+ memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
/* Copy the active part of the level stack. */
to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
static int bidi_cache_idx;
static int bidi_cache_last_idx;
-static inline void
+static INLINE void
bidi_cache_reset (void)
{
bidi_cache_idx = 0;
bidi_cache_last_idx = -1;
}
-static inline void
+static INLINE void
bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
{
int current_scan_dir = bidi_it->scan_dir;
level less or equal to LEVEL. if LEVEL is -1, disregard the
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
-static inline int
+static INLINE int
bidi_cache_search (int charpos, int level, int dir)
{
int i, i_start;
return -1;
}
-static inline void
+static INLINE void
bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
{
int idx;
if (idx < 0)
{
idx = bidi_cache_idx;
+ /* Don't overrun the cache limit. */
if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1)
abort ();
+ /* Character positions should correspond to cache positions 1:1.
+ If we are outside the range of cached positions, the cache is
+ useless and must be reset. */
+ if (idx > 0 &&
+ (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
+ || bidi_it->charpos < bidi_cache[0].charpos))
+ {
+ bidi_cache_reset ();
+ idx = 0;
+ }
bidi_copy_it (&bidi_cache[idx], bidi_it);
if (!resolved)
bidi_cache[idx].resolved_level = -1;
+ bidi_cache[idx].new_paragraph = 0;
}
else
{
bidi_cache_idx = idx + 1;
}
-static inline bidi_type_t
+static INLINE bidi_type_t
bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
{
int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
{
bidi_dir_t current_scan_dir = bidi_it->scan_dir;
- *bidi_it = bidi_cache[i];
+ bidi_copy_it (bidi_it, &bidi_cache[i]);
bidi_cache_last_idx = i;
/* Don't let scan direction from from the cached state override
the current scan direction. */
return UNKNOWN_BT;
}
-static inline int
+static INLINE int
bidi_peek_at_next_level (struct bidi_it *bidi_it)
{
if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
following the buffer position, -1 if position is at the beginning
of a new paragraph, or -2 if position is neither at beginning nor
at end of a paragraph. */
-EMACS_INT
+static EMACS_INT
bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
{
Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate,
embedding levels on either side of the run boundary. Also, update
the saved info about previously seen characters, since that info is
generally valid for a single level run. */
-static inline void
+static INLINE void
bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
{
int higher_level = level_before > level_after ? level_before : level_after;
bidi_it->invalid_rl_levels = -1;
bidi_it->next_en_pos = -1;
bidi_it->next_for_ws.type = UNKNOWN_BT;
- bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir,
+ bidi_set_sor_type (bidi_it,
+ bidi_it->paragraph_dir == R2L ? 1 : 0,
bidi_it->level_stack[0].level); /* X10 */
bidi_cache_reset ();
/* Find the beginning of this paragraph by looking back in the buffer.
Value is the byte position of the paragraph's beginning. */
static EMACS_INT
-bidi_find_paragraph_start (struct bidi_it *bidi_it)
+bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
{
Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ());
- EMACS_INT pos = bidi_it->charpos;
- EMACS_INT pos_byte = bidi_it->bytepos;
EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
if (!STRINGP (re))
}
/* Determine the direction, a.k.a. base embedding level, of the
- paragraph we are about to iterate through. */
+ paragraph we are about to iterate through. If DIR is either L2R or
+ R2L, just use that. Otherwise, determine the paragraph direction
+ from the first strong character of the paragraph.
+
+ Note that this gives the paragraph separator the same direction as
+ the preceding paragraph, even though Emacs generally views the
+ separartor as not belonging to any paragraph. */
void
bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
{
EMACS_INT bytepos = bidi_it->bytepos;
+ /* Special case for an empty buffer. */
+ if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
+ dir = L2R;
/* We should never be called at EOB or before BEGV. */
- if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
+ else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
abort ();
if (dir == L2R)
int ch, ch_len;
EMACS_INT pos;
bidi_type_t type;
- EMACS_INT sep_len;
/* If we are inside a paragraph separator, we are just waiting
for the separator to be exhausted; use the previous paragraph
- direction. */
- if (bidi_it->charpos < bidi_it->separator_limit)
+ direction. But don't do that if we have been just reseated,
+ because we need to reinitialize below in that case. */
+ if (!bidi_it->first_elt
+ && bidi_it->charpos < bidi_it->separator_limit)
return;
- /* If we are before another paragraph separator, continue
- through that with the previous paragraph direction. */
- sep_len = bidi_at_paragraph_end (bidi_it->charpos, bytepos);
- if (sep_len >= 0)
+ /* If we are on a newline, get past it to where the next
+ paragraph might start. But don't do that at BEGV since then
+ we are potentially in a new paragraph that doesn't yet
+ exist. */
+ pos = bidi_it->charpos;
+ if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
{
- bidi_it->separator_limit += sep_len + 1;
- return;
+ bytepos++;
+ pos++;
}
- else if (sep_len == -2)
- /* We are in the middle of a paragraph. Search back to where
- this paragraph starts. */
- bytepos = bidi_find_paragraph_start (bidi_it);
+
+ /* We are either at the beginning of a paragraph or in the
+ middle of it. Find where this paragraph starts. */
+ bytepos = bidi_find_paragraph_start (pos, bytepos);
/* We should always be at the beginning of a new line at this
point. */
- if (!(bytepos == BEGV_BYTE
- || FETCH_CHAR (bytepos) == '\n'
- || FETCH_CHAR (bytepos - 1) == '\n'))
+ if (!(bytepos == BEGV_BYTE || FETCH_CHAR (bytepos - 1) == '\n'))
abort ();
bidi_it->separator_limit = -1;
{
if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
break;
+ if (bytepos >= ZV_BYTE)
+ {
+ /* Pretend there's a paragraph separator at end of buffer. */
+ type = NEUTRAL_B;
+ break;
+ }
FETCH_CHAR_ADVANCE (ch, pos, bytepos);
}
if (type == STRONG_R || type == STRONG_AL) /* P3 */
else
abort ();
- /* Contrary to UAX#9 clause P3, we only default to L2R if we have no
- previous usable paragraph direction. */
+ /* Contrary to UAX#9 clause P3, we only default the paragraph
+ direction to L2R if we have no previous usable paragraph
+ direction. */
if (bidi_it->paragraph_dir == NEUTRAL_DIR)
- bidi_it->paragraph_dir = L2R; /* P3 */
+ bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
if (bidi_it->paragraph_dir == R2L)
- bidi_it->level_stack[0].level == 1;
+ bidi_it->level_stack[0].level = 1;
else
- bidi_it->level_stack[0].level == 0;
+ bidi_it->level_stack[0].level = 0;
bidi_line_init (bidi_it);
}
/* Do whatever UAX#9 clause X8 says should be done at paragraph's
end. */
-static inline void
+static INLINE void
bidi_set_paragraph_end (struct bidi_it *bidi_it)
{
bidi_it->invalid_levels = 0;
bidi_it->new_paragraph = 1;
bidi_it->separator_limit = -1;
bidi_it->type = NEUTRAL_B;
- bidi_it->type_after_w1 = UNKNOWN_BT;
- bidi_it->orig_type = UNKNOWN_BT;
+ bidi_it->type_after_w1 = NEUTRAL_B;
+ bidi_it->orig_type = NEUTRAL_B;
bidi_it->prev_was_pdf = 0;
- bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
+ bidi_it->prev.type = bidi_it->prev.type_after_w1 =
+ bidi_it->prev.orig_type = UNKNOWN_BT;
bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
bidi_it->last_strong.orig_type = UNKNOWN_BT;
bidi_it->next_for_neutral.charpos = -1;
/* Push the current embedding level and override status; reset the
current level to LEVEL and the current override status to OVERRIDE. */
-static inline void
+static INLINE void
bidi_push_embedding_level (struct bidi_it *bidi_it,
int level, bidi_dir_t override)
{
/* Pop the embedding level and directional override status from the
stack, and return the new level. */
-static inline int
+static INLINE int
bidi_pop_embedding_level (struct bidi_it *bidi_it)
{
/* UAX#9 says to ignore invalid PDFs. */
}
/* Record in SAVED_INFO the information about the current character. */
-static inline void
+static INLINE void
bidi_remember_char (struct bidi_saved_info *saved_info,
struct bidi_it *bidi_it)
{
/* Resolve the type of a neutral character according to the type of
surrounding strong text and the current embedding level. */
-static inline bidi_type_t
+static INLINE bidi_type_t
bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
{
/* N1: European and Arabic numbers are treated as though they were R. */
return STRONG_R;
}
-static inline int
+static INLINE int
bidi_explicit_dir_char (int c)
{
/* FIXME: this should be replaced with a lookup table with suitable
if (prev_level < new_level
&& bidi_it->type == WEAK_BN
&& bidi_it->ignore_bn_limit == 0 /* only if not already known */
- && bidi_it->ch != BIDI_EOB /* not already at EOB */
+ && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
&& bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
+ bidi_it->ch_len)))
{
if (bidi_it->type == NEUTRAL_B) /* X8 */
{
- /* End of buffer does _not_ indicate a new paragraph is coming.
- Otherwise, each character inserted at EOB will be processed
- as starting a new paragraph. */
- if (bidi_it->bytepos < ZV_BYTE)
- bidi_set_paragraph_end (bidi_it);
+ bidi_set_paragraph_end (bidi_it);
/* This is needed by bidi_resolve_weak below, and in L1. */
bidi_it->type_after_w1 = bidi_it->type;
bidi_check_type (bidi_it->type_after_w1);
/* Advance in the buffer, resolve weak types and return the type of
the next character after weak type resolution. */
-bidi_type_t
+static bidi_type_t
bidi_resolve_weak (struct bidi_it *bidi_it)
{
bidi_type_t type;
type = STRONG_R;
else if (override == L2R)
type = STRONG_L;
- else if (type == STRONG_AL)
- type = STRONG_R; /* W3 */
- else if (type == WEAK_NSM) /* W1 */
- {
- /* Note that we don't need to consider the case where the prev
- character has its type overridden by an RLO or LRO: such
- characters are outside the current level run, and thus not
- relevant to this NSM. Thus, NSM gets the orig_type of the
- previous character. */
- if (bidi_it->prev.type != UNKNOWN_BT)
- type = bidi_it->prev.orig_type;
- else if (bidi_it->sor == R2L)
- type = STRONG_R;
- else if (bidi_it->sor == L2R)
- type = STRONG_L;
- else /* shouldn't happen! */
- abort ();
- if (type == WEAK_EN /* W2 after W1 */
- && bidi_it->last_strong.type_after_w1 == STRONG_AL)
- type = WEAK_AN;
- }
- else if (type == WEAK_EN /* W2 */
- && bidi_it->last_strong.type_after_w1 == STRONG_AL)
- type = WEAK_AN;
- else if ((type == WEAK_ES
- && (bidi_it->prev.type_after_w1 == WEAK_EN /* W4 */
- && (bidi_it->prev.orig_type == WEAK_EN
- || bidi_it->prev.orig_type == WEAK_NSM))) /* aft W1 */
- || (type == WEAK_CS
- && ((bidi_it->prev.type_after_w1 == WEAK_EN
- && (bidi_it->prev.orig_type == WEAK_EN /* W4 */
- || bidi_it->prev.orig_type == WEAK_NSM)) /* a/W1 */
- || bidi_it->prev.type_after_w1 == WEAK_AN))) /* W4 */
+ else
{
- next_char =
- bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
- ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
- type_of_next = bidi_get_type (next_char, override);
-
- if (type_of_next == WEAK_BN
- || bidi_explicit_dir_char (next_char))
+ if (type == WEAK_NSM) /* W1 */
{
- bidi_copy_it (&saved_it, bidi_it);
- while (bidi_resolve_explicit (bidi_it) == new_level
- && bidi_it->type == WEAK_BN)
- ;
- type_of_next = bidi_it->type;
- bidi_copy_it (bidi_it, &saved_it);
- }
-
- /* If the next character is EN, but the last strong-type
- character is AL, that next EN will be changed to AN when we
- process it in W2 above. So in that case, this ES should not
- be changed into EN. */
- if (type == WEAK_ES
- && type_of_next == WEAK_EN
- && bidi_it->last_strong.type_after_w1 != STRONG_AL)
- type = WEAK_EN;
- else if (type == WEAK_CS)
- {
- if (bidi_it->prev.type_after_w1 == WEAK_AN
- && (type_of_next == WEAK_AN
- /* If the next character is EN, but the last
- strong-type character is AL, EN will be later
- changed to AN when we process it in W2 above. So
- in that case, this ES should not be changed into
- EN. */
- || (type_of_next == WEAK_EN
- && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
- type = WEAK_AN;
- else if (bidi_it->prev.type_after_w1 == WEAK_EN
- && type_of_next == WEAK_EN
- && bidi_it->last_strong.type_after_w1 != STRONG_AL)
- type = WEAK_EN;
+ /* Note that we don't need to consider the case where the
+ prev character has its type overridden by an RLO or LRO,
+ because then either the type of this NSM would have been
+ also overridden, or the previous character is outside the
+ current level run, and thus not relevant to this NSM.
+ This is why NSM gets the type_after_w1 of the previous
+ character. */
+ if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
+ /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
+ && bidi_it->prev.type_after_w1 != NEUTRAL_B)
+ type = bidi_it->prev.type_after_w1;
+ else if (bidi_it->sor == R2L)
+ type = STRONG_R;
+ else if (bidi_it->sor == L2R)
+ type = STRONG_L;
+ else /* shouldn't happen! */
+ abort ();
}
- }
- else if (type == WEAK_ET /* W5: ET with EN before or after it */
- || type == WEAK_BN) /* W5/Retaining */
- {
- if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN with EN before it */
- || bidi_it->next_en_pos > bidi_it->charpos)
- type = WEAK_EN;
- /* W5: ET with EN after it. */
- else
+ if (type == WEAK_EN /* W2 */
+ && bidi_it->last_strong.type_after_w1 == STRONG_AL)
+ type = WEAK_AN;
+ else if (type == STRONG_AL) /* W3 */
+ type = STRONG_R;
+ else if ((type == WEAK_ES /* W4 */
+ && bidi_it->prev.type_after_w1 == WEAK_EN
+ && bidi_it->prev.orig_type == WEAK_EN)
+ || (type == WEAK_CS
+ && ((bidi_it->prev.type_after_w1 == WEAK_EN
+ && bidi_it->prev.orig_type == WEAK_EN)
+ || bidi_it->prev.type_after_w1 == WEAK_AN)))
{
- EMACS_INT en_pos = bidi_it->charpos + 1;
-
next_char =
bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
type_of_next = bidi_get_type (next_char, override);
- if (type_of_next == WEAK_ET
- || type_of_next == WEAK_BN
+ if (type_of_next == WEAK_BN
|| bidi_explicit_dir_char (next_char))
{
bidi_copy_it (&saved_it, bidi_it);
while (bidi_resolve_explicit (bidi_it) == new_level
- && (bidi_it->type == WEAK_BN || bidi_it->type == WEAK_ET))
+ && bidi_it->type == WEAK_BN)
;
type_of_next = bidi_it->type;
- en_pos = bidi_it->charpos;
bidi_copy_it (bidi_it, &saved_it);
}
- if (type_of_next == WEAK_EN)
+
+ /* If the next character is EN, but the last strong-type
+ character is AL, that next EN will be changed to AN when
+ we process it in W2 above. So in that case, this ES
+ should not be changed into EN. */
+ if (type == WEAK_ES
+ && type_of_next == WEAK_EN
+ && bidi_it->last_strong.type_after_w1 != STRONG_AL)
+ type = WEAK_EN;
+ else if (type == WEAK_CS)
+ {
+ if (bidi_it->prev.type_after_w1 == WEAK_AN
+ && (type_of_next == WEAK_AN
+ /* If the next character is EN, but the last
+ strong-type character is AL, EN will be later
+ changed to AN when we process it in W2 above.
+ So in that case, this ES should not be
+ changed into EN. */
+ || (type_of_next == WEAK_EN
+ && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
+ type = WEAK_AN;
+ else if (bidi_it->prev.type_after_w1 == WEAK_EN
+ && type_of_next == WEAK_EN
+ && bidi_it->last_strong.type_after_w1 != STRONG_AL)
+ type = WEAK_EN;
+ }
+ }
+ else if (type == WEAK_ET /* W5: ET with EN before or after it */
+ || type == WEAK_BN) /* W5/Retaining */
+ {
+ if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
+ || bidi_it->next_en_pos > bidi_it->charpos)
+ type = WEAK_EN;
+ else /* W5: ET/BN with EN after it. */
{
- /* If the last strong character is AL, the EN we've
- found will become AN when we get to it (W2). */
- if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
+ EMACS_INT en_pos = bidi_it->charpos + 1;
+
+ next_char =
+ bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
+ ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+ type_of_next = bidi_get_type (next_char, override);
+
+ if (type_of_next == WEAK_ET
+ || type_of_next == WEAK_BN
+ || bidi_explicit_dir_char (next_char))
+ {
+ bidi_copy_it (&saved_it, bidi_it);
+ while (bidi_resolve_explicit (bidi_it) == new_level
+ && (bidi_it->type == WEAK_BN
+ || bidi_it->type == WEAK_ET))
+ ;
+ type_of_next = bidi_it->type;
+ en_pos = bidi_it->charpos;
+ bidi_copy_it (bidi_it, &saved_it);
+ }
+ if (type_of_next == WEAK_EN)
{
- type = WEAK_EN;
- /* Remember this EN position, to speed up processing
- of the next ETs. */
- bidi_it->next_en_pos = en_pos;
+ /* If the last strong character is AL, the EN we've
+ found will become AN when we get to it (W2). */
+ if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
+ {
+ type = WEAK_EN;
+ /* Remember this EN position, to speed up processing
+ of the next ETs. */
+ bidi_it->next_en_pos = en_pos;
+ }
+ else if (type == WEAK_BN)
+ type = NEUTRAL_ON; /* W6/Retaining */
}
- else if (type == WEAK_BN)
- type = NEUTRAL_ON; /* W6/Retaining */
}
}
}
return type;
}
-bidi_type_t
+static bidi_type_t
bidi_resolve_neutral (struct bidi_it *bidi_it)
{
int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
/* Given an iterator state in BIDI_IT, advance one character position
in the buffer to the next character (in the logical order), resolve
the bidi type of that next character, and return that type. */
-bidi_type_t
+static bidi_type_t
bidi_type_of_next_char (struct bidi_it *bidi_it)
{
bidi_type_t type;
the buffer to the next character (in the logical order), resolve
the embedding and implicit levels of that next character, and
return the resulting level. */
-int
+static int
bidi_level_of_next_char (struct bidi_it *bidi_it)
{
bidi_type_t type;
if (bidi_it->scan_dir == 1)
{
/* There's no sense in trying to advance if we hit end of text. */
- if (bidi_it->ch == BIDI_EOB)
+ if (bidi_it->bytepos >= ZV_BYTE)
return bidi_it->resolved_level;
/* Record the info about the previous character. */
old_level = bidi_it->resolved_level;
new_level = bidi_level_of_next_char (bidi_it);
- if (bidi_it->ch == BIDI_EOB)
- return;
/* Reordering of resolved levels (clause L2) is implemented by
jumping to the other edge of the level and flipping direction of
- scanning the buffer whenever we find a level change. */
+ scanning the text whenever we find a level change. */
if (new_level != old_level)
{
int ascending = new_level > old_level;
next_level = bidi_level_of_next_char (bidi_it);
}
- /* Take note when we are at the end of the paragraph. The next time
- we are about to be called, set_iterator_to_next will
- automatically reinit the paragraph direction, if needed. */
+ /* Take note when we have just processed the newline that precedes
+ the end of the paragraph. The next time we are about to be
+ called, set_iterator_to_next will automatically reinit the
+ paragraph direction, if needed. We do this at the newline before
+ the paragraph separator, because the next character might not be
+ the first character of the next paragraph, due to the bidi
+ reordering, whereas we _must_ know the paragraph base direction
+ _before_ we process the paragraph's text, since the base
+ direction affects the reordering. */
if (bidi_it->scan_dir == 1
&& bidi_it->orig_type == NEUTRAL_B
&& bidi_it->bytepos < ZV_BYTE)
if (sep_len >= 0)
{
bidi_it->new_paragraph = 1;
- /* Record the buffer position of the first character after
- the paragraph separator. */
- bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len + 1;
+ /* Record the buffer position of the last character of the
+ paragraph separator. */
+ bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
}
}