/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
- Copyright (C) 2000-2001, 2004-2005, 2009-2011
+ Copyright (C) 2000-2001, 2004-2005, 2009-2012
Free Software Foundation, Inc.
This file is part of GNU Emacs.
A note about references to UAX#9 rules: if the reference says
something like "X9/Retaining", it means that you need to refer to
- rule X9 and to its modifications decribed in the "Implementation
+ rule X9 and to its modifications described in the "Implementation
Notes" section of UAX#9, under "Retaining Format Codes". */
#include <config.h>
#include <setjmp.h>
#include "lisp.h"
-#include "buffer.h"
#include "character.h"
+#include "buffer.h"
#include "dispextern.h"
static int bidi_initialized = 0;
static inline void
bidi_check_type (bidi_type_t type)
{
- xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
+ eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
}
/* Given a bidi TYPE of a character, return its category. */
val = CHAR_TABLE_REF (bidi_mirror_table, c);
if (INTEGERP (val))
{
- int v = XINT (val);
+ int v;
+
+ /* When debugging, check before assigning to V, so that the check
+ isn't broken by undefined behavior due to int overflow. */
+ eassert (CHAR_VALID_P (XINT (val)));
+ v = XINT (val);
+
+ /* Minimal test we must do in optimized builds, to prevent weird
+ crashes further down the road. */
if (v < 0 || v > MAX_CHAR)
abort ();
static inline void
bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
{
- int higher_level = level_before > level_after ? level_before : level_after;
+ int higher_level = (level_before > level_after ? level_before : level_after);
/* The prev_was_pdf gork is required for when we have several PDFs
in a row. In that case, we want to compute the sor type for the
level to which we descend after processing all the PDFs. */
if (!bidi_it->prev_was_pdf || level_before < level_after)
/* FIXME: should the default sor direction be user selectable? */
- bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
+ bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
if (level_before > level_after)
bidi_it->prev_was_pdf = 1;
bidi_it->prev.type = UNKNOWN_BT;
- bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
- bidi_it->last_strong.orig_type = UNKNOWN_BT;
- bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
+ bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
+ = bidi_it->last_strong.orig_type = UNKNOWN_BT;
+ bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
- bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
- bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
+ bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
+ = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
}
int level, bidi_dir_t override)
{
bidi_it->stack_idx++;
- xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
+ eassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
bidi_it->level_stack[bidi_it->stack_idx].level = level;
bidi_it->level_stack[bidi_it->stack_idx].override = override;
}
/* Size of header used by bidi_shelve_cache. */
enum
{
- bidi_shelve_header_size =
- (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
- + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
- + sizeof (bidi_cache_last_idx))
+ bidi_shelve_header_size
+ = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
+ + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
+ + sizeof (bidi_cache_last_idx))
};
/* Reset the cache state to the empty state. We only reset the part
{
if (bidi_cache_size > BIDI_CACHE_CHUNK)
{
- bidi_cache =
- (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
+ bidi_cache
+ = (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
bidi_cache_size = BIDI_CACHE_CHUNK;
}
bidi_cache_reset ();
resolved levels in cached states. DIR, if non-zero, means search
in that direction from the last cache hit. */
static inline ptrdiff_t
-bidi_cache_search (EMACS_INT charpos, int level, int dir)
+bidi_cache_search (ptrdiff_t charpos, int level, int dir)
{
ptrdiff_t i, i_start;
ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
int incr = before ? 1 : 0;
- xassert (!dir || bidi_cache_last_idx >= 0);
+ eassert (!dir || bidi_cache_last_idx >= 0);
if (!dir)
dir = -1;
{
/* The bidi cache cannot be larger than the largest Lisp string
or buffer. */
- ptrdiff_t string_or_buffer_bound =
- max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
+ ptrdiff_t string_or_buffer_bound
+ = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
/* Also, it cannot be larger than what C can represent. */
- ptrdiff_t c_bound =
- (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
+ ptrdiff_t c_bound
+ = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
- bidi_cache =
- xpalloc (bidi_cache, &bidi_cache_size,
- max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
- min (string_or_buffer_bound, c_bound), elsz);
+ bidi_cache
+ = xpalloc (bidi_cache, &bidi_cache_size,
+ max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
+ min (string_or_buffer_bound, c_bound), elsz);
}
}
}
static inline bidi_type_t
-bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
+bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
{
ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
bidi_copy_it (bidi_it, &bidi_cache[i]);
bidi_cache_last_idx = i;
- /* Don't let scan direction from from the cached state override
+ /* Don't let scan direction from the cached state override
the current scan direction. */
bidi_it->scan_dir = current_scan_dir;
return bidi_it->type;
memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
/* Push the current cache start onto the stack. */
- xassert (bidi_cache_sp < IT_STACK_SIZE);
+ eassert (bidi_cache_sp < IT_STACK_SIZE);
bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
/* Start a new level of cache, and make it empty. */
ptrdiff_t idx;
memcpy (&idx, p, sizeof (bidi_cache_idx));
- bidi_cache_total_alloc -=
- bidi_shelve_header_size + idx * sizeof (struct bidi_it);
+ bidi_cache_total_alloc
+ -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
}
else
{
+ sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
+ sizeof (bidi_cache_start),
sizeof (bidi_cache_last_idx));
- bidi_cache_total_alloc -=
- bidi_shelve_header_size + bidi_cache_idx * sizeof (struct bidi_it);
+ bidi_cache_total_alloc
+ -= (bidi_shelve_header_size
+ + bidi_cache_idx * sizeof (struct bidi_it));
}
xfree (p);
/* Initialize the bidi iterator from buffer/string position CHARPOS. */
void
-bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
+bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, int frame_window_p,
struct bidi_it *bidi_it)
{
if (! bidi_initialized)
bidi_it->type_after_w1 = NEUTRAL_B;
bidi_it->orig_type = NEUTRAL_B;
bidi_it->prev_was_pdf = 0;
- bidi_it->prev.type = bidi_it->prev.type_after_w1 =
- bidi_it->prev.orig_type = UNKNOWN_BT;
- bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
- bidi_it->last_strong.orig_type = UNKNOWN_BT;
+ bidi_it->prev.type = bidi_it->prev.type_after_w1
+ = bidi_it->prev.orig_type = UNKNOWN_BT;
+ bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
+ = bidi_it->last_strong.orig_type = UNKNOWN_BT;
bidi_it->next_for_neutral.charpos = -1;
- bidi_it->next_for_neutral.type =
- bidi_it->next_for_neutral.type_after_w1 =
- bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
+ bidi_it->next_for_neutral.type
+ = bidi_it->next_for_neutral.type_after_w1
+ = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
bidi_it->prev_for_neutral.charpos = -1;
- bidi_it->prev_for_neutral.type =
- bidi_it->prev_for_neutral.type_after_w1 =
- bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
+ bidi_it->prev_for_neutral.type
+ = bidi_it->prev_for_neutral.type_after_w1
+ = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
bidi_it->disp_pos = -1; /* invalid/unknown */
bidi_it->disp_prop = 0;
bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
bidi_it->invalid_levels = 0;
bidi_it->invalid_rl_levels = -1;
- bidi_it->next_en_pos = -1;
+ /* Setting this to zero will force its recomputation the first time
+ we need it for W5. */
+ bidi_it->next_en_pos = 0;
+ bidi_it->next_en_type = UNKNOWN_BT;
bidi_it->next_for_ws.type = UNKNOWN_BT;
bidi_set_sor_type (bidi_it,
- bidi_it->paragraph_dir == R2L ? 1 : 0,
+ (bidi_it->paragraph_dir == R2L ? 1 : 0),
bidi_it->level_stack[0].level); /* X10 */
bidi_cache_reset ();
are zero-based character positions in S, BEGBYTE is byte position
corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
string. */
-static inline EMACS_INT
-bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
- const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
+static inline ptrdiff_t
+bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
+ const ptrdiff_t begbyte, const ptrdiff_t end, int unibyte)
{
- EMACS_INT pos = beg;
+ ptrdiff_t pos = beg;
const unsigned char *p = s + begbyte, *start = p;
if (unibyte)
character from the current buffer. UNIBYTE non-zero means S is a
unibyte string. */
static inline int
-bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
+bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, int unibyte)
{
if (s)
{
string to iterate, or NULL if iterating over a buffer or a Lisp
string; in the latter case, STRING->lstring is the Lisp string. */
static inline int
-bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
+bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
int *disp_prop, struct bidi_string_data *string,
- int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
+ int frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
{
int ch;
- EMACS_INT endpos =
- (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
+ ptrdiff_t endpos
+ = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
struct text_pos pos;
+ int len;
/* If we got past the last known position of display string, compute
the position of the next one. That position could be at CHARPOS. */
}
else if (charpos >= *disp_pos && *disp_prop)
{
- EMACS_INT disp_end_pos;
+ ptrdiff_t disp_end_pos;
/* We don't expect to find ourselves in the middle of a display
property. Hopefully, it will never be needed. */
ch = 0xFFFC;
}
disp_end_pos = compute_display_string_end (*disp_pos, string);
+ if (disp_end_pos < 0)
+ {
+ /* Somebody removed the display string from the buffer
+ behind our back. Recover by processing this buffer
+ position as if no display property were present there to
+ begin with. */
+ *disp_prop = 0;
+ goto normal_char;
+ }
*nchars = disp_end_pos - *disp_pos;
if (*nchars <= 0)
abort ();
}
else
{
+ normal_char:
if (string->s)
{
- int len;
if (!string->unibyte)
{
}
else if (STRINGP (string->lstring))
{
- int len;
-
if (!string->unibyte)
{
ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
}
else
{
- ch = FETCH_MULTIBYTE_CHAR (bytepos);
- *ch_len = CHAR_BYTES (ch);
+ ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
+ *ch_len = len;
}
*nchars = 1;
}
following the buffer position, -1 if position is at the beginning
of a new paragraph, or -2 if position is neither at beginning nor
at end of a paragraph. */
-static EMACS_INT
-bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
+static ptrdiff_t
+bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
{
Lisp_Object sep_re;
Lisp_Object start_re;
- EMACS_INT val;
+ ptrdiff_t val;
sep_re = paragraph_separate_re;
start_re = paragraph_start_re;
Value is the byte position of the paragraph's beginning, or
BEGV_BYTE if paragraph_start_re is still not found after looking
back MAX_PARAGRAPH_SEARCH lines in the buffer. */
-static EMACS_INT
-bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
+static ptrdiff_t
+bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
{
Lisp_Object re = paragraph_start_re;
- EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
- EMACS_INT n = 0;
+ ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
+ ptrdiff_t n = 0;
while (pos_byte > BEGV_BYTE
&& n++ < MAX_PARAGRAPH_SEARCH
Note that this function gives the paragraph separator the same
direction as the preceding paragraph, even though Emacs generally
- views the separartor as not belonging to any paragraph. */
+ views the separator as not belonging to any paragraph. */
void
bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
{
- EMACS_INT bytepos = bidi_it->bytepos;
+ ptrdiff_t bytepos = bidi_it->bytepos;
int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
- EMACS_INT pstartbyte;
+ ptrdiff_t pstartbyte;
/* Note that begbyte is a byte position, while end is a character
position. Yes, this is ugly, but we are trying to avoid costly
calls to BYTE_TO_CHAR and its ilk. */
- EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
- EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
+ ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
+ ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
/* Special case for an empty buffer. */
if (bytepos == begbyte && bidi_it->charpos == end)
else if (dir == NEUTRAL_DIR) /* P2 */
{
int ch;
- EMACS_INT ch_len, nchars;
- EMACS_INT pos, disp_pos = -1;
+ ptrdiff_t ch_len, nchars;
+ ptrdiff_t pos, disp_pos = -1;
int disp_prop = 0;
bidi_type_t type;
const unsigned char *s;
we are potentially in a new paragraph that doesn't yet
exist. */
pos = bidi_it->charpos;
- s = STRINGP (bidi_it->string.lstring) ?
- SDATA (bidi_it->string.lstring) : bidi_it->string.s;
+ s = (STRINGP (bidi_it->string.lstring)
+ ? SDATA (bidi_it->string.lstring)
+ : bidi_it->string.s);
if (bytepos > begbyte
&& bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
{
bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
else
{
- EMACS_INT prevpbyte = pstartbyte;
- EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
+ ptrdiff_t prevpbyte = pstartbyte;
+ ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
/* Find the beginning of the previous paragraph, if any. */
while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
bidi_it->first_elt = 0;
if (string_p)
{
- const unsigned char *p =
- STRINGP (bidi_it->string.lstring)
- ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
+ const unsigned char *p
+ = (STRINGP (bidi_it->string.lstring)
+ ? SDATA (bidi_it->string.lstring)
+ : bidi_it->string.s);
if (bidi_it->charpos < 0)
bidi_it->charpos = 0;
}
}
else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
- || bidi_it->next_en_pos > bidi_it->charpos)
+ || (bidi_it->next_en_pos > bidi_it->charpos
+ && bidi_it->next_en_type == WEAK_EN))
type = WEAK_EN;
break;
case LRE: /* X3 */
}
}
else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
- || bidi_it->next_en_pos > bidi_it->charpos)
+ || (bidi_it->next_en_pos > bidi_it->charpos
+ && bidi_it->next_en_type == WEAK_EN))
type = WEAK_EN;
break;
case PDF: /* X7 */
}
}
else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
- || bidi_it->next_en_pos > bidi_it->charpos)
+ || (bidi_it->next_en_pos > bidi_it->charpos
+ && bidi_it->next_en_type == WEAK_EN))
type = WEAK_EN;
break;
default:
{
int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
int new_level = bidi_resolve_explicit_1 (bidi_it);
- EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
- const unsigned char *s = STRINGP (bidi_it->string.lstring)
- ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
+ ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
+ const unsigned char *s
+ = (STRINGP (bidi_it->string.lstring)
+ ? SDATA (bidi_it->string.lstring)
+ : bidi_it->string.s);
if (prev_level < new_level
&& bidi_it->type == WEAK_BN
int next_char;
bidi_type_t type_of_next;
struct bidi_it saved_it;
- EMACS_INT eob =
- (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
- ? bidi_it->string.schars : ZV;
+ ptrdiff_t eob
+ = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
+ ? bidi_it->string.schars : ZV);
type = bidi_it->type;
override = bidi_it->level_stack[bidi_it->stack_idx].override;
&& bidi_it->prev.orig_type == WEAK_EN)
|| bidi_it->prev.type_after_w1 == WEAK_AN)))
{
- const unsigned char *s =
- STRINGP (bidi_it->string.lstring)
- ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
-
- next_char =
- bidi_it->charpos + bidi_it->nchars >= eob
- ? BIDI_EOB
- : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
- bidi_it->string.unibyte);
+ const unsigned char *s
+ = (STRINGP (bidi_it->string.lstring)
+ ? SDATA (bidi_it->string.lstring)
+ : bidi_it->string.s);
+
+ next_char = (bidi_it->charpos + bidi_it->nchars >= eob
+ ? BIDI_EOB
+ : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
+ s, bidi_it->string.unibyte));
type_of_next = bidi_get_type (next_char, override);
if (type_of_next == WEAK_BN
else if (type == WEAK_ET /* W5: ET with EN before or after it */
|| type == WEAK_BN) /* W5/Retaining */
{
- if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
- || bidi_it->next_en_pos > bidi_it->charpos)
+ if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
type = WEAK_EN;
- else /* W5: ET/BN with EN after it. */
+ else if (bidi_it->next_en_pos > bidi_it->charpos
+ && bidi_it->next_en_type != WEAK_BN)
{
- EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
- const unsigned char *s =
- STRINGP (bidi_it->string.lstring)
- ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
+ if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
+ type = WEAK_EN;
+ }
+ else if (bidi_it->next_en_pos >=0)
+ {
+ ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
+ const unsigned char *s = (STRINGP (bidi_it->string.lstring)
+ ? SDATA (bidi_it->string.lstring)
+ : bidi_it->string.s);
if (bidi_it->nchars <= 0)
abort ();
- next_char =
- bidi_it->charpos + bidi_it->nchars >= eob
- ? BIDI_EOB
- : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
- bidi_it->string.unibyte);
+ next_char
+ = (bidi_it->charpos + bidi_it->nchars >= eob
+ ? BIDI_EOB
+ : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
+ bidi_it->string.unibyte));
type_of_next = bidi_get_type (next_char, override);
if (type_of_next == WEAK_ET
en_pos = bidi_it->charpos;
bidi_copy_it (bidi_it, &saved_it);
}
+ /* Remember this position, to speed up processing of the
+ next ETs. */
+ bidi_it->next_en_pos = en_pos;
if (type_of_next == WEAK_EN)
{
/* If the last strong character is AL, the EN we've
found will become AN when we get to it (W2). */
- if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
- {
- type = WEAK_EN;
- /* Remember this EN position, to speed up processing
- of the next ETs. */
- bidi_it->next_en_pos = en_pos;
- }
+ if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
+ type_of_next = WEAK_AN;
else if (type == WEAK_BN)
type = NEUTRAL_ON; /* W6/Retaining */
+ else
+ type = WEAK_EN;
}
+ else if (type_of_next == NEUTRAL_B)
+ /* Record the fact that there are no more ENs from
+ here to the end of paragraph, to avoid entering the
+ loop above ever again in this paragraph. */
+ bidi_it->next_en_pos = -1;
+ /* Record the type of the character where we ended our search. */
+ bidi_it->next_en_type = type_of_next;
}
}
}
|| type == NEUTRAL_ON))
abort ();
- if (bidi_get_category (type) == NEUTRAL
+ if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
+ we are already at paragraph end. */
+ && bidi_get_category (type) == NEUTRAL)
|| (type == WEAK_BN && prev_level == current_level))
{
if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
bidi_it->next_for_neutral.type,
current_level);
+ /* The next two "else if" clauses are shortcuts for the
+ important special case when we have a long sequence of
+ neutral or WEAK_BN characters, such as whitespace or nulls or
+ other control characters, on the base embedding level of the
+ paragraph, and that sequence goes all the way to the end of
+ the paragraph and follows a character whose resolved
+ directionality is identical to the base embedding level.
+ (This is what happens in a buffer with plain L2R text that
+ happens to include long sequences of control characters.) By
+ virtue of N1, the result of examining this long sequence will
+ always be either STRONG_L or STRONG_R, depending on the base
+ embedding level. So we use this fact directly instead of
+ entering the expensive loop in the "else" clause. */
+ else if (current_level == 0
+ && bidi_it->prev_for_neutral.type == STRONG_L
+ && !bidi_explicit_dir_char (bidi_it->ch))
+ type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
+ STRONG_L, current_level);
+ else if (/* current level is 1 */
+ current_level == 1
+ /* base embedding level is also 1 */
+ && bidi_it->level_stack[0].level == 1
+ /* previous character is one of those considered R for
+ the purposes of W5 */
+ && (bidi_it->prev_for_neutral.type == STRONG_R
+ || bidi_it->prev_for_neutral.type == WEAK_EN
+ || bidi_it->prev_for_neutral.type == WEAK_AN)
+ && !bidi_explicit_dir_char (bidi_it->ch))
+ type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
+ STRONG_R, current_level);
else
{
/* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
&& bidi_get_category (type) != NEUTRAL)
/* This is all per level run, so stop when we
reach the end of this level run. */
- || bidi_it->level_stack[bidi_it->stack_idx].level !=
- current_level));
+ || (bidi_it->level_stack[bidi_it->stack_idx].level
+ != current_level)));
bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
case STRONG_L:
case STRONG_R:
case STRONG_AL:
+ /* Actually, STRONG_AL cannot happen here, because
+ bidi_resolve_weak converts it to STRONG_R, per W3. */
+ eassert (type != STRONG_AL);
next_type = type;
break;
case WEAK_EN:
/* N1: ``European and Arabic numbers are treated as
though they were R.'' */
next_type = STRONG_R;
- saved_it.next_for_neutral.type = STRONG_R;
break;
case WEAK_BN:
if (!bidi_explicit_dir_char (bidi_it->ch))
member. */
if (saved_it.type != WEAK_BN
|| bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
- {
- next_type = bidi_it->prev_for_neutral.type;
- saved_it.next_for_neutral.type = next_type;
- bidi_check_type (next_type);
- }
+ next_type = bidi_it->prev_for_neutral.type;
else
{
/* This is a BN which does not adjoin neutrals.
}
type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
next_type, current_level);
+ saved_it.next_for_neutral.type = next_type;
saved_it.type = type;
+ bidi_check_type (next_type);
bidi_check_type (type);
bidi_copy_it (bidi_it, &saved_it);
}
bidi_type_t type;
int level, prev_level = -1;
struct bidi_saved_info next_for_neutral;
- EMACS_INT next_char_pos = -2;
+ ptrdiff_t next_char_pos = -2;
if (bidi_it->scan_dir == 1)
{
- EMACS_INT eob =
- (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
- ? bidi_it->string.schars : ZV;
+ ptrdiff_t eob
+ = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
+ ? bidi_it->string.schars : ZV);
/* There's no sense in trying to advance if we hit end of text. */
if (bidi_it->charpos >= eob)
bidi_it->next_for_neutral.type = UNKNOWN_BT;
if (bidi_it->next_en_pos >= 0
&& bidi_it->charpos >= bidi_it->next_en_pos)
- bidi_it->next_en_pos = -1;
+ {
+ bidi_it->next_en_pos = 0;
+ bidi_it->next_en_type = UNKNOWN_BT;
+ }
if (bidi_it->next_for_ws.type != UNKNOWN_BT
&& bidi_it->charpos >= bidi_it->next_for_ws.charpos)
bidi_it->next_for_ws.type = UNKNOWN_BT;
UNKNOWN_BT. */
if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
{
- int bob =
- (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
-
+ int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
+ ? 0 : 1);
if (bidi_it->scan_dir > 0)
{
if (bidi_it->nchars <= 0)
&& bidi_it->next_for_ws.type == UNKNOWN_BT)
{
int ch;
- EMACS_INT clen = bidi_it->ch_len;
- EMACS_INT bpos = bidi_it->bytepos;
- EMACS_INT cpos = bidi_it->charpos;
- EMACS_INT disp_pos = bidi_it->disp_pos;
- EMACS_INT nc = bidi_it->nchars;
+ ptrdiff_t clen = bidi_it->ch_len;
+ ptrdiff_t bpos = bidi_it->bytepos;
+ ptrdiff_t cpos = bidi_it->charpos;
+ ptrdiff_t disp_pos = bidi_it->disp_pos;
+ ptrdiff_t nc = bidi_it->nchars;
struct bidi_string_data bs = bidi_it->string;
bidi_type_t chtype;
int fwp = bidi_it->frame_window_p;
do {
ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
fwp, &clen, &nc);
- if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
+ if (ch == '\n' || ch == BIDI_EOB)
chtype = NEUTRAL_B;
else
chtype = bidi_get_type (ch, NEUTRAL_DIR);
}
/* Resolve implicit levels, with a twist: PDFs get the embedding
- level of the enbedding they terminate. See below for the
+ level of the embedding they terminate. See below for the
reason. */
if (bidi_it->orig_type == PDF
/* Don't do this if this formatting code didn't change the
else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
|| bidi_it->orig_type == NEUTRAL_S
|| bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
- /* || bidi_it->ch == LINESEP_CHAR */
|| (bidi_it->orig_type == NEUTRAL_WS
&& (bidi_it->next_for_ws.type == NEUTRAL_B
|| bidi_it->next_for_ws.type == NEUTRAL_S)))
bidi_it->separator_limit = bidi_it->string.schars;
else if (bidi_it->bytepos < ZV_BYTE)
{
- EMACS_INT sep_len =
- bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
- bidi_it->bytepos + bidi_it->ch_len);
+ ptrdiff_t sep_len
+ = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
+ bidi_it->bytepos + bidi_it->ch_len);
if (bidi_it->nchars <= 0)
abort ();
if (sep_len >= 0)
bidi_it->new_paragraph = 1;
/* Record the buffer position of the last character of the
paragraph separator. */
- bidi_it->separator_limit =
- bidi_it->charpos + bidi_it->nchars + sep_len;
+ bidi_it->separator_limit
+ = bidi_it->charpos + bidi_it->nchars + sep_len;
}
}
}
fputs ("\n", stderr);
fputs ("pos ", stderr);
for (i = 0; i < bidi_cache_idx; i++)
- fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
+ fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
fputs ("\n", stderr);
}