src/bidi.c

   1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    The two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    If you want to understand the code, you will have to read it
  44    together with the relevant portions of UAX#9.  The comments include
  45    references to UAX#9 rules, for that very reason.
  46
  47    A note about references to UAX#9 rules: if the reference says
  48    something like "X9/Retaining", it means that you need to refer to
  49    rule X9 and to its modifications decribed in the "Implementation
  50    Notes" section of UAX#9, under "Retaining Format Codes".  */
  51
  52 #include <config.h>
  53 #include <stdio.h>
  54 #include <setjmp.h>
  55
  56 #include "lisp.h"
  57 #include "buffer.h"
  58 #include "character.h"
  59 #include "dispextern.h"
  60
  61 static int bidi_initialized = 0;
  62
  63 static Lisp_Object bidi_type_table, bidi_mirror_table;
  64
  65 /* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table.  */
  66 #define LRM_CHAR   0x200E
  67 #define RLM_CHAR   0x200F
  68 #define LRE_CHAR   0x202A
  69 #define RLE_CHAR   0x202B
  70 #define PDF_CHAR   0x202C
  71 #define LRO_CHAR   0x202D
  72 #define RLO_CHAR   0x202E
  73
  74 #define BIDI_EOB   -1
  75
  76 /* Local data structures.  (Look in dispextern.h for the rest.)  */
  77
  78 /* What we need to know about the current paragraph.  */
  79 struct bidi_paragraph_info {
  80   EMACS_INT start_bytepos;      /* byte position where it begins */
  81   EMACS_INT end_bytepos;        /* byte position where it ends */
  82   int       embedding_level;    /* its basic embedding level */
  83   bidi_dir_t base_dir;          /* its base direction */
  84 };
  85
  86 /* Data type for describing the bidirectional character categories.  */
  87 typedef enum {
  88   UNKNOWN_BC,
  89   NEUTRAL,
  90   WEAK,
  91   STRONG
  92 } bidi_category_t;
  93
  94 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  95
  96 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  97 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  98
  99 static void
 100 bidi_initialize (void)
 101 {
 102
 103 #include "biditype.h"
 104 #include "bidimirror.h"
 105
 106   int i;
 107
 108   bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
 109   staticpro (&bidi_type_table);
 110
 111   for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
 112     char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
 113                           make_number (bidi_type[i].type));
 114
 115   bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
 116   staticpro (&bidi_mirror_table);
 117
 118   for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
 119     char_table_set (bidi_mirror_table, bidi_mirror[i].from,
 120                     make_number (bidi_mirror[i].to));
 121
 122   Qparagraph_start = intern ("paragraph-start");
 123   staticpro (&Qparagraph_start);
 124   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 125   if (!STRINGP (paragraph_start_re))
 126     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 127   staticpro (&paragraph_start_re);
 128   Qparagraph_separate = intern ("paragraph-separate");
 129   staticpro (&Qparagraph_separate);
 130   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 131   if (!STRINGP (paragraph_separate_re))
 132     paragraph_separate_re = build_string ("[ \t\f]*$");
 133   staticpro (&paragraph_separate_re);
 134   bidi_initialized = 1;
 135 }
 136
 137 /* Return the bidi type of a character CH, subject to the current
 138    directional OVERRIDE.  */
 139 static INLINE bidi_type_t
 140 bidi_get_type (int ch, bidi_dir_t override)
 141 {
 142   bidi_type_t default_type;
 143
 144   if (ch == BIDI_EOB)
 145     return NEUTRAL_B;
 146   if (ch < 0 || ch > MAX_CHAR)
 147     abort ();
 148
 149   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 150
 151   if (override == NEUTRAL_DIR)
 152     return default_type;
 153
 154   switch (default_type)
 155     {
 156       /* Although UAX#9 does not tell, it doesn't make sense to
 157          override NEUTRAL_B and LRM/RLM characters.  */
 158       case NEUTRAL_B:
 159       case LRE:
 160       case LRO:
 161       case RLE:
 162       case RLO:
 163       case PDF:
 164         return default_type;
 165       default:
 166         switch (ch)
 167           {
 168             case LRM_CHAR:
 169             case RLM_CHAR:
 170               return default_type;
 171             default:
 172               if (override == L2R) /* X6 */
 173                 return STRONG_L;
 174               else if (override == R2L)
 175                 return STRONG_R;
 176               else
 177                 abort ();       /* can't happen: handled above */
 178           }
 179     }
 180 }
 181
 182 static void
 183 bidi_check_type (bidi_type_t type)
 184 {
 185   if (type < UNKNOWN_BT || type > NEUTRAL_ON)
 186     abort ();
 187 }
 188
 189 /* Given a bidi TYPE of a character, return its category.  */
 190 static INLINE bidi_category_t
 191 bidi_get_category (bidi_type_t type)
 192 {
 193   switch (type)
 194     {
 195       case UNKNOWN_BT:
 196         return UNKNOWN_BC;
 197       case STRONG_L:
 198       case STRONG_R:
 199       case STRONG_AL:
 200       case LRE:
 201       case LRO:
 202       case RLE:
 203       case RLO:
 204         return STRONG;
 205       case PDF:         /* ??? really?? */
 206       case WEAK_EN:
 207       case WEAK_ES:
 208       case WEAK_ET:
 209       case WEAK_AN:
 210       case WEAK_CS:
 211       case WEAK_NSM:
 212       case WEAK_BN:
 213         return WEAK;
 214       case NEUTRAL_B:
 215       case NEUTRAL_S:
 216       case NEUTRAL_WS:
 217       case NEUTRAL_ON:
 218         return NEUTRAL;
 219       default:
 220         abort ();
 221     }
 222 }
 223
 224 /* Return the mirrored character of C, if it has one.  If C has no
 225    mirrored counterpart, return C.
 226    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 227    context must be tested by the caller.  */
 228 int
 229 bidi_mirror_char (int c)
 230 {
 231   Lisp_Object val;
 232
 233   if (c == BIDI_EOB)
 234     return c;
 235   if (c < 0 || c > MAX_CHAR)
 236     abort ();
 237
 238   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 239   if (INTEGERP (val))
 240     {
 241       int v = XINT (val);
 242
 243       if (v < 0 || v > MAX_CHAR)
 244         abort ();
 245
 246       return v;
 247     }
 248
 249   return c;
 250 }
 251
 252 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 253    copies the part of the level stack that is actually in use.  */
 254 static INLINE void
 255 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 256 {
 257   int i;
 258
 259   /* Copy everything except the level stack and beyond.  */
 260   memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
 261
 262   /* Copy the active part of the level stack.  */
 263   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 264   for (i = 1; i <= from->stack_idx; i++)
 265     to->level_stack[i] = from->level_stack[i];
 266 }
 267
 268 /* Caching the bidi iterator states.  */
 269
 270 #define BIDI_CACHE_CHUNK 200
 271 static struct bidi_it *bidi_cache;
 272 static size_t bidi_cache_size = 0;
 273 static size_t elsz = sizeof (struct bidi_it);
 274 static int bidi_cache_idx;      /* next unused cache slot */
 275 static int bidi_cache_last_idx; /* slot of last cache hit */
 276
 277 static INLINE void
 278 bidi_cache_reset (void)
 279 {
 280   bidi_cache_idx = 0;
 281   bidi_cache_last_idx = -1;
 282 }
 283
 284 static INLINE void
 285 bidi_cache_shrink (void)
 286 {
 287   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 288     {
 289       bidi_cache_size = BIDI_CACHE_CHUNK;
 290       bidi_cache =
 291         (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 292     }
 293   bidi_cache_reset ();
 294 }
 295
 296 static INLINE void
 297 bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
 298 {
 299   int current_scan_dir = bidi_it->scan_dir;
 300
 301   if (idx < 0 || idx >= bidi_cache_idx)
 302     abort ();
 303
 304   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 305   bidi_it->scan_dir = current_scan_dir;
 306   bidi_cache_last_idx = idx;
 307 }
 308
 309 /* Find a cached state with a given CHARPOS and resolved embedding
 310    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 311    resolved levels in cached states.  DIR, if non-zero, means search
 312    in that direction from the last cache hit.  */
 313 static INLINE int
 314 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 315 {
 316   int i, i_start;
 317
 318   if (bidi_cache_idx)
 319     {
 320       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 321         dir = -1;
 322       else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
 323         dir = 1;
 324       if (dir)
 325         i_start = bidi_cache_last_idx;
 326       else
 327         {
 328           dir = -1;
 329           i_start = bidi_cache_idx - 1;
 330         }
 331
 332       if (dir < 0)
 333         {
 334           /* Linear search for now; FIXME!  */
 335           for (i = i_start; i >= 0; i--)
 336             if (bidi_cache[i].charpos == charpos
 337                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 338               return i;
 339         }
 340       else
 341         {
 342           for (i = i_start; i < bidi_cache_idx; i++)
 343             if (bidi_cache[i].charpos == charpos
 344                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 345               return i;
 346         }
 347     }
 348
 349   return -1;
 350 }
 351
 352 /* Find a cached state where the resolved level changes to a value
 353    that is lower than LEVEL, and return its cache slot index.  DIR is
 354    the direction to search, starting with the last used cache slot.
 355    BEFORE, if non-zero, means return the index of the slot that is
 356    ``before'' the level change in the search direction.  That is,
 357    given the cached levels like this:
 358
 359          1122333442211
 360           AB        C
 361
 362    and assuming we are at the position cached at the slot marked with
 363    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 364    index of slot B or A, depending whether BEFORE is, respectively,
 365    non-zero or zero.  */
 366 static int
 367 bidi_cache_find_level_change (int level, int dir, int before)
 368 {
 369   if (bidi_cache_idx)
 370     {
 371       int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 372       int incr = before ? 1 : 0;
 373
 374       if (!dir)
 375         dir = -1;
 376       else if (!incr)
 377         i += dir;
 378
 379       if (dir < 0)
 380         {
 381           while (i >= incr)
 382             {
 383               if (bidi_cache[i - incr].resolved_level >= 0
 384                   && bidi_cache[i - incr].resolved_level < level)
 385                 return i;
 386               i--;
 387             }
 388         }
 389       else
 390         {
 391           while (i < bidi_cache_idx - incr)
 392             {
 393               if (bidi_cache[i + incr].resolved_level >= 0
 394                   && bidi_cache[i + incr].resolved_level < level)
 395                 return i;
 396               i++;
 397             }
 398         }
 399     }
 400
 401   return -1;
 402 }
 403
 404 static INLINE void
 405 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 406 {
 407   int idx;
 408
 409   /* We should never cache on backward scans.  */
 410   if (bidi_it->scan_dir == -1)
 411     abort ();
 412   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 413
 414   if (idx < 0)
 415     {
 416       idx = bidi_cache_idx;
 417       /* Enlarge the cache as needed.  */
 418       if (idx >= bidi_cache_size)
 419         {
 420           bidi_cache_size += BIDI_CACHE_CHUNK;
 421           bidi_cache =
 422             (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 423         }
 424       /* Character positions should correspond to cache positions 1:1.
 425          If we are outside the range of cached positions, the cache is
 426          useless and must be reset.  */
 427       if (idx > 0 &&
 428           (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
 429            || bidi_it->charpos < bidi_cache[0].charpos))
 430         {
 431           bidi_cache_reset ();
 432           idx = 0;
 433         }
 434       bidi_copy_it (&bidi_cache[idx], bidi_it);
 435       if (!resolved)
 436         bidi_cache[idx].resolved_level = -1;
 437     }
 438   else
 439     {
 440       /* Copy only the members which could have changed, to avoid
 441          costly copying of the entire struct.  */
 442       bidi_cache[idx].type = bidi_it->type;
 443       bidi_check_type (bidi_it->type);
 444       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 445       bidi_check_type (bidi_it->type_after_w1);
 446       if (resolved)
 447         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 448       else
 449         bidi_cache[idx].resolved_level = -1;
 450       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 451       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 452       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 453       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 454       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 455     }
 456
 457   bidi_cache_last_idx = idx;
 458   if (idx >= bidi_cache_idx)
 459     bidi_cache_idx = idx + 1;
 460 }
 461
 462 static INLINE bidi_type_t
 463 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 464 {
 465   int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 466
 467   if (i >= 0)
 468     {
 469       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 470
 471       bidi_copy_it (bidi_it, &bidi_cache[i]);
 472       bidi_cache_last_idx = i;
 473       /* Don't let scan direction from from the cached state override
 474          the current scan direction.  */
 475       bidi_it->scan_dir = current_scan_dir;
 476       return bidi_it->type;
 477     }
 478
 479   return UNKNOWN_BT;
 480 }
 481
 482 static INLINE int
 483 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 484 {
 485   if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
 486     abort ();
 487   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 488 }
 489
 490 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
 491    Value is the non-negative length of the paragraph separator
 492    following the buffer position, -1 if position is at the beginning
 493    of a new paragraph, or -2 if position is neither at beginning nor
 494    at end of a paragraph.  */
 495 static EMACS_INT
 496 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
 497 {
 498   Lisp_Object sep_re;
 499   Lisp_Object start_re;
 500   EMACS_INT val;
 501
 502   sep_re = paragraph_separate_re;
 503   start_re = paragraph_start_re;
 504
 505   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
 506   if (val < 0)
 507     {
 508       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
 509         val = -1;
 510       else
 511         val = -2;
 512     }
 513
 514   return val;
 515 }
 516
 517 /* Determine the start-of-run (sor) directional type given the two
 518    embedding levels on either side of the run boundary.  Also, update
 519    the saved info about previously seen characters, since that info is
 520    generally valid for a single level run.  */
 521 static INLINE void
 522 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 523 {
 524   int higher_level = level_before > level_after ? level_before : level_after;
 525
 526   /* The prev_was_pdf gork is required for when we have several PDFs
 527      in a row.  In that case, we want to compute the sor type for the
 528      next level run only once: when we see the first PDF.  That's
 529      because the sor type depends only on the higher of the two levels
 530      that we find on the two sides of the level boundary (see UAX#9,
 531      clause X10), and so we don't need to know the final embedding
 532      level to which we descend after processing all the PDFs.  */
 533   if (!bidi_it->prev_was_pdf || level_before < level_after)
 534     /* FIXME: should the default sor direction be user selectable?  */
 535     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 536   if (level_before > level_after)
 537     bidi_it->prev_was_pdf = 1;
 538
 539   bidi_it->prev.type = UNKNOWN_BT;
 540   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 541     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 542   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 543   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 544   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 545   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 546     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 547   bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
 548 }
 549
 550 static void
 551 bidi_line_init (struct bidi_it *bidi_it)
 552 {
 553   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 554   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 555   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 556   bidi_it->invalid_levels = 0;
 557   bidi_it->invalid_rl_levels = -1;
 558   bidi_it->next_en_pos = -1;
 559   bidi_it->next_for_ws.type = UNKNOWN_BT;
 560   bidi_set_sor_type (bidi_it,
 561                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 562                      bidi_it->level_stack[0].level); /* X10 */
 563
 564   bidi_cache_reset ();
 565 }
 566
 567 /* Find the beginning of this paragraph by looking back in the buffer.
 568    Value is the byte position of the paragraph's beginning.  */
 569 static EMACS_INT
 570 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
 571 {
 572   Lisp_Object re = paragraph_start_re;
 573   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
 574
 575   while (pos_byte > BEGV_BYTE
 576          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
 577     {
 578       pos = find_next_newline_no_quit (pos - 1, -1);
 579       pos_byte = CHAR_TO_BYTE (pos);
 580     }
 581   return pos_byte;
 582 }
 583
 584 /* Determine the base direction, a.k.a. base embedding level, of the
 585    paragraph we are about to iterate through.  If DIR is either L2R or
 586    R2L, just use that.  Otherwise, determine the paragraph direction
 587    from the first strong directional character of the paragraph.
 588
 589    NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
 590    has no strong directional characters and both DIR and
 591    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
 592    in the buffer until a paragraph is found with a strong character,
 593    or until hitting BEGV.  In the latter case, fall back to L2R.  This
 594    flag is used in current-bidi-paragraph-direction.
 595
 596    Note that this function gives the paragraph separator the same
 597    direction as the preceding paragraph, even though Emacs generally
 598    views the separartor as not belonging to any paragraph.  */
 599 void
 600 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 601 {
 602   EMACS_INT bytepos = bidi_it->bytepos;
 603   EMACS_INT pstartbyte;
 604
 605   /* Special case for an empty buffer. */
 606   if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
 607     dir = L2R;
 608   /* We should never be called at EOB or before BEGV.  */
 609   else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
 610     abort ();
 611
 612   if (dir == L2R)
 613     {
 614       bidi_it->paragraph_dir = L2R;
 615       bidi_it->new_paragraph = 0;
 616     }
 617   else if (dir == R2L)
 618     {
 619       bidi_it->paragraph_dir = R2L;
 620       bidi_it->new_paragraph = 0;
 621     }
 622   else if (dir == NEUTRAL_DIR)  /* P2 */
 623     {
 624       int ch, ch_len;
 625       EMACS_INT pos;
 626       bidi_type_t type;
 627
 628       if (!bidi_initialized)
 629         bidi_initialize ();
 630
 631       /* If we are inside a paragraph separator, we are just waiting
 632          for the separator to be exhausted; use the previous paragraph
 633          direction.  But don't do that if we have been just reseated,
 634          because we need to reinitialize below in that case.  */
 635       if (!bidi_it->first_elt
 636           && bidi_it->charpos < bidi_it->separator_limit)
 637         return;
 638
 639       /* If we are on a newline, get past it to where the next
 640          paragraph might start.  But don't do that at BEGV since then
 641          we are potentially in a new paragraph that doesn't yet
 642          exist.  */
 643       pos = bidi_it->charpos;
 644       if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
 645         {
 646           bytepos++;
 647           pos++;
 648         }
 649
 650       /* We are either at the beginning of a paragraph or in the
 651          middle of it.  Find where this paragraph starts.  */
 652       pstartbyte = bidi_find_paragraph_start (pos, bytepos);
 653       bidi_it->separator_limit = -1;
 654       bidi_it->new_paragraph = 0;
 655
 656       /* The following loop is run more than once only if NO_DEFAULT_P
 657          is non-zero.  */
 658       do {
 659         bytepos = pstartbyte;
 660         ch = FETCH_CHAR (bytepos);
 661         ch_len = CHAR_BYTES (ch);
 662         pos = BYTE_TO_CHAR (bytepos);
 663         type = bidi_get_type (ch, NEUTRAL_DIR);
 664
 665         for (pos++, bytepos += ch_len;
 666              /* NOTE: UAX#9 says to search only for L, AL, or R types
 667                 of characters, and ignore RLE, RLO, LRE, and LRO.
 668                 However, I'm not sure it makes sense to omit those 4;
 669                 should try with and without that to see the effect.  */
 670              (bidi_get_category (type) != STRONG)
 671                || (bidi_ignore_explicit_marks_for_paragraph_level
 672                    && (type == RLE || type == RLO
 673                        || type == LRE || type == LRO));
 674              type = bidi_get_type (ch, NEUTRAL_DIR))
 675           {
 676             if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
 677               break;
 678             if (bytepos >= ZV_BYTE)
 679               {
 680                 /* Pretend there's a paragraph separator at end of
 681                    buffer.  */
 682                 type = NEUTRAL_B;
 683                 break;
 684               }
 685             FETCH_CHAR_ADVANCE (ch, pos, bytepos);
 686           }
 687         if (type == STRONG_R || type == STRONG_AL) /* P3 */
 688           bidi_it->paragraph_dir = R2L;
 689         else if (type == STRONG_L)
 690           bidi_it->paragraph_dir = L2R;
 691         if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
 692           {
 693             /* If this paragraph is at BEGV, default to L2R.  */
 694             if (pstartbyte == BEGV_BYTE)
 695               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
 696             else
 697               {
 698                 EMACS_INT prevpbyte = pstartbyte;
 699                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
 700
 701                 /* Find the beginning of the previous paragraph, if any.  */
 702                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
 703                   {
 704                     p--;
 705                     pbyte = CHAR_TO_BYTE (p);
 706                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
 707                   }
 708                 pstartbyte = prevpbyte;
 709               }
 710           }
 711       } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
 712     }
 713   else
 714     abort ();
 715
 716   /* Contrary to UAX#9 clause P3, we only default the paragraph
 717      direction to L2R if we have no previous usable paragraph
 718      direction.  This is allowed by the HL1 clause.  */
 719   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
 720     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
 721   if (bidi_it->paragraph_dir == R2L)
 722     bidi_it->level_stack[0].level = 1;
 723   else
 724     bidi_it->level_stack[0].level = 0;
 725
 726   bidi_line_init (bidi_it);
 727 }
 728
 729 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 730    end.  */
 731 static INLINE void
 732 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 733 {
 734   bidi_it->invalid_levels = 0;
 735   bidi_it->invalid_rl_levels = -1;
 736   bidi_it->stack_idx = 0;
 737   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 738 }
 739
 740 /* Initialize the bidi iterator from buffer position CHARPOS.  */
 741 void
 742 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
 743 {
 744   if (! bidi_initialized)
 745     bidi_initialize ();
 746   bidi_it->charpos = charpos;
 747   bidi_it->bytepos = bytepos;
 748   bidi_it->first_elt = 1;
 749   bidi_set_paragraph_end (bidi_it);
 750   bidi_it->new_paragraph = 1;
 751   bidi_it->separator_limit = -1;
 752   bidi_it->type = NEUTRAL_B;
 753   bidi_it->type_after_w1 = NEUTRAL_B;
 754   bidi_it->orig_type = NEUTRAL_B;
 755   bidi_it->prev_was_pdf = 0;
 756   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 757     bidi_it->prev.orig_type = UNKNOWN_BT;
 758   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 759     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 760   bidi_it->next_for_neutral.charpos = -1;
 761   bidi_it->next_for_neutral.type =
 762     bidi_it->next_for_neutral.type_after_w1 =
 763     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 764   bidi_it->prev_for_neutral.charpos = -1;
 765   bidi_it->prev_for_neutral.type =
 766     bidi_it->prev_for_neutral.type_after_w1 =
 767     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 768   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 769   bidi_cache_shrink ();
 770 }
 771
 772 /* Push the current embedding level and override status; reset the
 773    current level to LEVEL and the current override status to OVERRIDE.  */
 774 static INLINE void
 775 bidi_push_embedding_level (struct bidi_it *bidi_it,
 776                            int level, bidi_dir_t override)
 777 {
 778   bidi_it->stack_idx++;
 779   if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
 780     abort ();
 781   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 782   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 783 }
 784
 785 /* Pop the embedding level and directional override status from the
 786    stack, and return the new level.  */
 787 static INLINE int
 788 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 789 {
 790   /* UAX#9 says to ignore invalid PDFs.  */
 791   if (bidi_it->stack_idx > 0)
 792     bidi_it->stack_idx--;
 793   return bidi_it->level_stack[bidi_it->stack_idx].level;
 794 }
 795
 796 /* Record in SAVED_INFO the information about the current character.  */
 797 static INLINE void
 798 bidi_remember_char (struct bidi_saved_info *saved_info,
 799                     struct bidi_it *bidi_it)
 800 {
 801   saved_info->charpos = bidi_it->charpos;
 802   saved_info->bytepos = bidi_it->bytepos;
 803   saved_info->type = bidi_it->type;
 804   bidi_check_type (bidi_it->type);
 805   saved_info->type_after_w1 = bidi_it->type_after_w1;
 806   bidi_check_type (bidi_it->type_after_w1);
 807   saved_info->orig_type = bidi_it->orig_type;
 808   bidi_check_type (bidi_it->orig_type);
 809 }
 810
 811 /* Resolve the type of a neutral character according to the type of
 812    surrounding strong text and the current embedding level.  */
 813 static INLINE bidi_type_t
 814 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
 815 {
 816   /* N1: European and Arabic numbers are treated as though they were R.  */
 817   if (next_type == WEAK_EN || next_type == WEAK_AN)
 818     next_type = STRONG_R;
 819   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
 820     prev_type = STRONG_R;
 821
 822   if (next_type == prev_type)   /* N1 */
 823     return next_type;
 824   else if ((lev & 1) == 0)      /* N2 */
 825     return STRONG_L;
 826   else
 827     return STRONG_R;
 828 }
 829
 830 static INLINE int
 831 bidi_explicit_dir_char (int c)
 832 {
 833   /* FIXME: this should be replaced with a lookup table with suitable
 834      bits set, like standard C ctype macros do.  */
 835   return (c == LRE_CHAR || c == LRO_CHAR
 836           || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
 837 }
 838
 839 /* A helper function for bidi_resolve_explicit.  It advances to the
 840    next character in logical order and determines the new embedding
 841    level and directional override, but does not take into account
 842    empty embeddings.  */
 843 static int
 844 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
 845 {
 846   int curchar;
 847   bidi_type_t type;
 848   int current_level;
 849   int new_level;
 850   bidi_dir_t override;
 851
 852   if (bidi_it->bytepos < BEGV_BYTE      /* after reseat to BEGV? */
 853       || bidi_it->first_elt)
 854     {
 855       bidi_it->first_elt = 0;
 856       if (bidi_it->charpos < BEGV)
 857         bidi_it->charpos = BEGV;
 858       bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
 859     }
 860   else if (bidi_it->bytepos < ZV_BYTE)  /* don't move at ZV */
 861     {
 862       bidi_it->charpos++;
 863       if (bidi_it->ch_len == 0)
 864         abort ();
 865       bidi_it->bytepos += bidi_it->ch_len;
 866     }
 867
 868   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
 869   override = bidi_it->level_stack[bidi_it->stack_idx].override;
 870   new_level = current_level;
 871
 872   /* in case it is a unibyte character (not yet implemented) */
 873   /* _fetch_multibyte_char_len = 1; */
 874   if (bidi_it->bytepos >= ZV_BYTE)
 875     {
 876       curchar = BIDI_EOB;
 877       bidi_it->ch_len = 1;
 878     }
 879   else
 880     {
 881       curchar = FETCH_CHAR (bidi_it->bytepos);
 882       bidi_it->ch_len = CHAR_BYTES (curchar);
 883     }
 884   bidi_it->ch = curchar;
 885
 886   /* Don't apply directional override here, as all the types we handle
 887      below will not be affected by the override anyway, and we need
 888      the original type unaltered.  The override will be applied in
 889      bidi_resolve_weak.  */
 890   type = bidi_get_type (curchar, NEUTRAL_DIR);
 891   bidi_it->orig_type = type;
 892   bidi_check_type (bidi_it->orig_type);
 893
 894   if (type != PDF)
 895     bidi_it->prev_was_pdf = 0;
 896
 897   bidi_it->type_after_w1 = UNKNOWN_BT;
 898
 899   switch (type)
 900     {
 901       case RLE: /* X2 */
 902       case RLO: /* X4 */
 903         bidi_it->type_after_w1 = type;
 904         bidi_check_type (bidi_it->type_after_w1);
 905         type = WEAK_BN; /* X9/Retaining */
 906         if (bidi_it->ignore_bn_limit <= 0)
 907           {
 908             if (current_level <= BIDI_MAXLEVEL - 4)
 909               {
 910                 /* Compute the least odd embedding level greater than
 911                    the current level.  */
 912                 new_level = ((current_level + 1) & ~1) + 1;
 913                 if (bidi_it->type_after_w1 == RLE)
 914                   override = NEUTRAL_DIR;
 915                 else
 916                   override = R2L;
 917                 if (current_level == BIDI_MAXLEVEL - 4)
 918                   bidi_it->invalid_rl_levels = 0;
 919                 bidi_push_embedding_level (bidi_it, new_level, override);
 920               }
 921             else
 922               {
 923                 bidi_it->invalid_levels++;
 924                 /* See the commentary about invalid_rl_levels below.  */
 925                 if (bidi_it->invalid_rl_levels < 0)
 926                   bidi_it->invalid_rl_levels = 0;
 927                 bidi_it->invalid_rl_levels++;
 928               }
 929           }
 930         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
 931                  || bidi_it->next_en_pos > bidi_it->charpos)
 932           type = WEAK_EN;
 933         break;
 934       case LRE: /* X3 */
 935       case LRO: /* X5 */
 936         bidi_it->type_after_w1 = type;
 937         bidi_check_type (bidi_it->type_after_w1);
 938         type = WEAK_BN; /* X9/Retaining */
 939         if (bidi_it->ignore_bn_limit <= 0)
 940           {
 941             if (current_level <= BIDI_MAXLEVEL - 5)
 942               {
 943                 /* Compute the least even embedding level greater than
 944                    the current level.  */
 945                 new_level = ((current_level + 2) & ~1);
 946                 if (bidi_it->type_after_w1 == LRE)
 947                   override = NEUTRAL_DIR;
 948                 else
 949                   override = L2R;
 950                 bidi_push_embedding_level (bidi_it, new_level, override);
 951               }
 952             else
 953               {
 954                 bidi_it->invalid_levels++;
 955                 /* invalid_rl_levels counts invalid levels encountered
 956                    while the embedding level was already too high for
 957                    LRE/LRO, but not for RLE/RLO.  That is because
 958                    there may be exactly one PDF which we should not
 959                    ignore even though invalid_levels is non-zero.
 960                    invalid_rl_levels helps to know what PDF is
 961                    that.  */
 962                 if (bidi_it->invalid_rl_levels >= 0)
 963                   bidi_it->invalid_rl_levels++;
 964               }
 965           }
 966         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
 967                  || bidi_it->next_en_pos > bidi_it->charpos)
 968           type = WEAK_EN;
 969         break;
 970       case PDF: /* X7 */
 971         bidi_it->type_after_w1 = type;
 972         bidi_check_type (bidi_it->type_after_w1);
 973         type = WEAK_BN; /* X9/Retaining */
 974         if (bidi_it->ignore_bn_limit <= 0)
 975           {
 976             if (!bidi_it->invalid_rl_levels)
 977               {
 978                 new_level = bidi_pop_embedding_level (bidi_it);
 979                 bidi_it->invalid_rl_levels = -1;
 980                 if (bidi_it->invalid_levels)
 981                   bidi_it->invalid_levels--;
 982                 /* else nothing: UAX#9 says to ignore invalid PDFs */
 983               }
 984             if (!bidi_it->invalid_levels)
 985               new_level = bidi_pop_embedding_level (bidi_it);
 986             else
 987               {
 988                 bidi_it->invalid_levels--;
 989                 bidi_it->invalid_rl_levels--;
 990               }
 991           }
 992         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
 993                  || bidi_it->next_en_pos > bidi_it->charpos)
 994           type = WEAK_EN;
 995         break;
 996       default:
 997         /* Nothing.  */
 998         break;
 999     }
1000
1001   bidi_it->type = type;
1002   bidi_check_type (bidi_it->type);
1003
1004   return new_level;
1005 }
1006
1007 /* Given an iterator state in BIDI_IT, advance one character position
1008    in the buffer to the next character (in the logical order), resolve
1009    any explicit embeddings and directional overrides, and return the
1010    embedding level of the character after resolving explicit
1011    directives and ignoring empty embeddings.  */
1012 static int
1013 bidi_resolve_explicit (struct bidi_it *bidi_it)
1014 {
1015   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1016   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1017
1018   if (prev_level < new_level
1019       && bidi_it->type == WEAK_BN
1020       && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1021       && bidi_it->bytepos < ZV_BYTE    /* not already at EOB */
1022       && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1023                                              + bidi_it->ch_len)))
1024     {
1025       /* Avoid pushing and popping embedding levels if the level run
1026          is empty, as this breaks level runs where it shouldn't.
1027          UAX#9 removes all the explicit embedding and override codes,
1028          so empty embeddings disappear without a trace.  We need to
1029          behave as if we did the same.  */
1030       struct bidi_it saved_it;
1031       int level = prev_level;
1032
1033       bidi_copy_it (&saved_it, bidi_it);
1034
1035       while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1036                                                  + bidi_it->ch_len)))
1037         {
1038           level = bidi_resolve_explicit_1 (bidi_it);
1039         }
1040
1041       if (level == prev_level)  /* empty embedding */
1042         saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1043       else                      /* this embedding is non-empty */
1044         saved_it.ignore_bn_limit = -1;
1045
1046       bidi_copy_it (bidi_it, &saved_it);
1047       if (bidi_it->ignore_bn_limit > 0)
1048         {
1049           /* We pushed a level, but we shouldn't have.  Undo that. */
1050           if (!bidi_it->invalid_rl_levels)
1051             {
1052               new_level = bidi_pop_embedding_level (bidi_it);
1053               bidi_it->invalid_rl_levels = -1;
1054               if (bidi_it->invalid_levels)
1055                 bidi_it->invalid_levels--;
1056             }
1057           if (!bidi_it->invalid_levels)
1058             new_level = bidi_pop_embedding_level (bidi_it);
1059           else
1060             {
1061               bidi_it->invalid_levels--;
1062               bidi_it->invalid_rl_levels--;
1063             }
1064         }
1065     }
1066
1067   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1068     {
1069       bidi_set_paragraph_end (bidi_it);
1070       /* This is needed by bidi_resolve_weak below, and in L1.  */
1071       bidi_it->type_after_w1 = bidi_it->type;
1072       bidi_check_type (bidi_it->type_after_w1);
1073     }
1074
1075   return new_level;
1076 }
1077
1078 /* Advance in the buffer, resolve weak types and return the type of
1079    the next character after weak type resolution.  */
1080 static bidi_type_t
1081 bidi_resolve_weak (struct bidi_it *bidi_it)
1082 {
1083   bidi_type_t type;
1084   bidi_dir_t override;
1085   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1086   int new_level  = bidi_resolve_explicit (bidi_it);
1087   int next_char;
1088   bidi_type_t type_of_next;
1089   struct bidi_it saved_it;
1090
1091   type = bidi_it->type;
1092   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1093
1094   if (type == UNKNOWN_BT
1095       || type == LRE
1096       || type == LRO
1097       || type == RLE
1098       || type == RLO
1099       || type == PDF)
1100     abort ();
1101
1102   if (new_level != prev_level
1103       || bidi_it->type == NEUTRAL_B)
1104     {
1105       /* We've got a new embedding level run, compute the directional
1106          type of sor and initialize per-run variables (UAX#9, clause
1107          X10).  */
1108       bidi_set_sor_type (bidi_it, prev_level, new_level);
1109     }
1110   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1111            || type == WEAK_BN || type == STRONG_AL)
1112     bidi_it->type_after_w1 = type;      /* needed in L1 */
1113   bidi_check_type (bidi_it->type_after_w1);
1114
1115   /* Level and directional override status are already recorded in
1116      bidi_it, and do not need any change; see X6.  */
1117   if (override == R2L)          /* X6 */
1118     type = STRONG_R;
1119   else if (override == L2R)
1120     type = STRONG_L;
1121   else
1122     {
1123       if (type == WEAK_NSM)     /* W1 */
1124         {
1125           /* Note that we don't need to consider the case where the
1126              prev character has its type overridden by an RLO or LRO,
1127              because then either the type of this NSM would have been
1128              also overridden, or the previous character is outside the
1129              current level run, and thus not relevant to this NSM.
1130              This is why NSM gets the type_after_w1 of the previous
1131              character.  */
1132           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1133               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1134               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1135             type = bidi_it->prev.type_after_w1;
1136           else if (bidi_it->sor == R2L)
1137             type = STRONG_R;
1138           else if (bidi_it->sor == L2R)
1139             type = STRONG_L;
1140           else /* shouldn't happen! */
1141             abort ();
1142         }
1143       if (type == WEAK_EN       /* W2 */
1144           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1145         type = WEAK_AN;
1146       else if (type == STRONG_AL) /* W3 */
1147         type = STRONG_R;
1148       else if ((type == WEAK_ES /* W4 */
1149                 && bidi_it->prev.type_after_w1 == WEAK_EN
1150                 && bidi_it->prev.orig_type == WEAK_EN)
1151                || (type == WEAK_CS
1152                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1153                         && bidi_it->prev.orig_type == WEAK_EN)
1154                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1155         {
1156           next_char =
1157             bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1158             ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1159           type_of_next = bidi_get_type (next_char, override);
1160
1161           if (type_of_next == WEAK_BN
1162               || bidi_explicit_dir_char (next_char))
1163             {
1164               bidi_copy_it (&saved_it, bidi_it);
1165               while (bidi_resolve_explicit (bidi_it) == new_level
1166                      && bidi_it->type == WEAK_BN)
1167                 ;
1168               type_of_next = bidi_it->type;
1169               bidi_copy_it (bidi_it, &saved_it);
1170             }
1171
1172           /* If the next character is EN, but the last strong-type
1173              character is AL, that next EN will be changed to AN when
1174              we process it in W2 above.  So in that case, this ES
1175              should not be changed into EN.  */
1176           if (type == WEAK_ES
1177               && type_of_next == WEAK_EN
1178               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1179             type = WEAK_EN;
1180           else if (type == WEAK_CS)
1181             {
1182               if (bidi_it->prev.type_after_w1 == WEAK_AN
1183                   && (type_of_next == WEAK_AN
1184                       /* If the next character is EN, but the last
1185                          strong-type character is AL, EN will be later
1186                          changed to AN when we process it in W2 above.
1187                          So in that case, this ES should not be
1188                          changed into EN.  */
1189                       || (type_of_next == WEAK_EN
1190                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1191                 type = WEAK_AN;
1192               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1193                        && type_of_next == WEAK_EN
1194                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1195                 type = WEAK_EN;
1196             }
1197         }
1198       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1199                || type == WEAK_BN)      /* W5/Retaining */
1200         {
1201           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1202               || bidi_it->next_en_pos > bidi_it->charpos)
1203             type = WEAK_EN;
1204           else                  /* W5: ET/BN with EN after it.  */
1205             {
1206               EMACS_INT en_pos = bidi_it->charpos + 1;
1207
1208               next_char =
1209                 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1210                 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1211               type_of_next = bidi_get_type (next_char, override);
1212
1213               if (type_of_next == WEAK_ET
1214                   || type_of_next == WEAK_BN
1215                   || bidi_explicit_dir_char (next_char))
1216                 {
1217                   bidi_copy_it (&saved_it, bidi_it);
1218                   while (bidi_resolve_explicit (bidi_it) == new_level
1219                          && (bidi_it->type == WEAK_BN
1220                              || bidi_it->type == WEAK_ET))
1221                     ;
1222                   type_of_next = bidi_it->type;
1223                   en_pos = bidi_it->charpos;
1224                   bidi_copy_it (bidi_it, &saved_it);
1225                 }
1226               if (type_of_next == WEAK_EN)
1227                 {
1228                   /* If the last strong character is AL, the EN we've
1229                      found will become AN when we get to it (W2). */
1230                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1231                     {
1232                       type = WEAK_EN;
1233                       /* Remember this EN position, to speed up processing
1234                          of the next ETs.  */
1235                       bidi_it->next_en_pos = en_pos;
1236                     }
1237                   else if (type == WEAK_BN)
1238                     type = NEUTRAL_ON; /* W6/Retaining */
1239                 }
1240             }
1241         }
1242     }
1243
1244   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1245       || (type == WEAK_BN
1246           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1247               || bidi_it->prev.type_after_w1 == WEAK_ES
1248               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1249     type = NEUTRAL_ON;
1250
1251   /* Store the type we've got so far, before we clobber it with strong
1252      types in W7 and while resolving neutral types.  But leave alone
1253      the original types that were recorded above, because we will need
1254      them for the L1 clause.  */
1255   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1256     bidi_it->type_after_w1 = type;
1257   bidi_check_type (bidi_it->type_after_w1);
1258
1259   if (type == WEAK_EN)  /* W7 */
1260     {
1261       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1262           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1263         type = STRONG_L;
1264     }
1265
1266   bidi_it->type = type;
1267   bidi_check_type (bidi_it->type);
1268   return type;
1269 }
1270
1271 static bidi_type_t
1272 bidi_resolve_neutral (struct bidi_it *bidi_it)
1273 {
1274   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1275   bidi_type_t type = bidi_resolve_weak (bidi_it);
1276   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1277
1278   if (!(type == STRONG_R
1279         || type == STRONG_L
1280         || type == WEAK_BN
1281         || type == WEAK_EN
1282         || type == WEAK_AN
1283         || type == NEUTRAL_B
1284         || type == NEUTRAL_S
1285         || type == NEUTRAL_WS
1286         || type == NEUTRAL_ON))
1287     abort ();
1288
1289   if (bidi_get_category (type) == NEUTRAL
1290       || (type == WEAK_BN && prev_level == current_level))
1291     {
1292       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1293         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1294                                        bidi_it->next_for_neutral.type,
1295                                        current_level);
1296       else
1297         {
1298           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1299              the assumption of batch-style processing; see clauses W4,
1300              W5, and especially N1, which require to look far forward
1301              (as well as back) in the buffer.  May the fleas of a
1302              thousand camels infest the armpits of those who design
1303              supposedly general-purpose algorithms by looking at their
1304              own implementations, and fail to consider other possible
1305              implementations!  */
1306           struct bidi_it saved_it;
1307           bidi_type_t next_type;
1308
1309           if (bidi_it->scan_dir == -1)
1310             abort ();
1311
1312           bidi_copy_it (&saved_it, bidi_it);
1313           /* Scan the text forward until we find the first non-neutral
1314              character, and then use that to resolve the neutral we
1315              are dealing with now.  We also cache the scanned iterator
1316              states, to salvage some of the effort later.  */
1317           bidi_cache_iterator_state (bidi_it, 0);
1318           do {
1319             /* Record the info about the previous character, so that
1320                it will be cached below with this state.  */
1321             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1322                 && bidi_it->type != WEAK_BN)
1323               bidi_remember_char (&bidi_it->prev, bidi_it);
1324             type = bidi_resolve_weak (bidi_it);
1325             /* Paragraph separators have their levels fully resolved
1326                at this point, so cache them as resolved.  */
1327             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1328             /* FIXME: implement L1 here, by testing for a newline and
1329                resetting the level for any sequence of whitespace
1330                characters adjacent to it.  */
1331           } while (!(type == NEUTRAL_B
1332                      || (type != WEAK_BN
1333                          && bidi_get_category (type) != NEUTRAL)
1334                      /* This is all per level run, so stop when we
1335                         reach the end of this level run.  */
1336                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1337                      current_level));
1338
1339           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1340
1341           switch (type)
1342             {
1343               case STRONG_L:
1344               case STRONG_R:
1345               case STRONG_AL:
1346                 next_type = type;
1347                 break;
1348               case WEAK_EN:
1349               case WEAK_AN:
1350                 /* N1: ``European and Arabic numbers are treated as
1351                    though they were R.''  */
1352                 next_type = STRONG_R;
1353                 saved_it.next_for_neutral.type = STRONG_R;
1354                 break;
1355               case WEAK_BN:
1356                 if (!bidi_explicit_dir_char (bidi_it->ch))
1357                   abort ();             /* can't happen: BNs are skipped */
1358                 /* FALLTHROUGH */
1359               case NEUTRAL_B:
1360                 /* Marched all the way to the end of this level run.
1361                    We need to use the eor type, whose information is
1362                    stored by bidi_set_sor_type in the prev_for_neutral
1363                    member.  */
1364                 if (saved_it.type != WEAK_BN
1365                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1366                   {
1367                     next_type = bidi_it->prev_for_neutral.type;
1368                     saved_it.next_for_neutral.type = next_type;
1369                     bidi_check_type (next_type);
1370                   }
1371                 else
1372                   {
1373                     /* This is a BN which does not adjoin neutrals.
1374                        Leave its type alone.  */
1375                     bidi_copy_it (bidi_it, &saved_it);
1376                     return bidi_it->type;
1377                   }
1378                 break;
1379               default:
1380                 abort ();
1381             }
1382           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1383                                          next_type, current_level);
1384           saved_it.type = type;
1385           bidi_check_type (type);
1386           bidi_copy_it (bidi_it, &saved_it);
1387         }
1388     }
1389   return type;
1390 }
1391
1392 /* Given an iterator state in BIDI_IT, advance one character position
1393    in the buffer to the next character (in the logical order), resolve
1394    the bidi type of that next character, and return that type.  */
1395 static bidi_type_t
1396 bidi_type_of_next_char (struct bidi_it *bidi_it)
1397 {
1398   bidi_type_t type;
1399
1400   /* This should always be called during a forward scan.  */
1401   if (bidi_it->scan_dir != 1)
1402     abort ();
1403
1404   /* Reset the limit until which to ignore BNs if we step out of the
1405      area where we found only empty levels.  */
1406   if ((bidi_it->ignore_bn_limit > 0
1407        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1408       || (bidi_it->ignore_bn_limit == -1
1409           && !bidi_explicit_dir_char (bidi_it->ch)))
1410     bidi_it->ignore_bn_limit = 0;
1411
1412   type = bidi_resolve_neutral (bidi_it);
1413
1414   return type;
1415 }
1416
1417 /* Given an iterator state BIDI_IT, advance one character position in
1418    the buffer to the next character (in the logical order), resolve
1419    the embedding and implicit levels of that next character, and
1420    return the resulting level.  */
1421 static int
1422 bidi_level_of_next_char (struct bidi_it *bidi_it)
1423 {
1424   bidi_type_t type;
1425   int level, prev_level = -1;
1426   struct bidi_saved_info next_for_neutral;
1427
1428   if (bidi_it->scan_dir == 1)
1429     {
1430       /* There's no sense in trying to advance if we hit end of text.  */
1431       if (bidi_it->bytepos >= ZV_BYTE)
1432         return bidi_it->resolved_level;
1433
1434       /* Record the info about the previous character.  */
1435       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1436           && bidi_it->type != WEAK_BN)
1437         bidi_remember_char (&bidi_it->prev, bidi_it);
1438       if (bidi_it->type_after_w1 == STRONG_R
1439           || bidi_it->type_after_w1 == STRONG_L
1440           || bidi_it->type_after_w1 == STRONG_AL)
1441         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1442       /* FIXME: it sounds like we don't need both prev and
1443          prev_for_neutral members, but I'm leaving them both for now.  */
1444       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1445           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1446         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1447
1448       /* If we overstepped the characters used for resolving neutrals
1449          and whitespace, invalidate their info in the iterator.  */
1450       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1451         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1452       if (bidi_it->next_en_pos >= 0
1453           && bidi_it->charpos >= bidi_it->next_en_pos)
1454         bidi_it->next_en_pos = -1;
1455       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1456           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1457         bidi_it->next_for_ws.type = UNKNOWN_BT;
1458
1459       /* This must be taken before we fill the iterator with the info
1460          about the next char.  If we scan backwards, the iterator
1461          state must be already cached, so there's no need to know the
1462          embedding level of the previous character, since we will be
1463          returning to our caller shortly.  */
1464       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1465     }
1466   next_for_neutral = bidi_it->next_for_neutral;
1467
1468   /* Perhaps it is already cached.  */
1469   type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1470   if (type != UNKNOWN_BT)
1471     {
1472       /* Don't lose the information for resolving neutrals!  The
1473          cached states could have been cached before their
1474          next_for_neutral member was computed.  If we are on our way
1475          forward, we can simply take the info from the previous
1476          state.  */
1477       if (bidi_it->scan_dir == 1
1478           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1479         bidi_it->next_for_neutral = next_for_neutral;
1480
1481       /* If resolved_level is -1, it means this state was cached
1482          before it was completely resolved, so we cannot return
1483          it.  */
1484       if (bidi_it->resolved_level != -1)
1485         return bidi_it->resolved_level;
1486     }
1487   if (bidi_it->scan_dir == -1)
1488     /* If we are going backwards, the iterator state is already cached
1489        from previous scans, and should be fully resolved.  */
1490     abort ();
1491
1492   if (type == UNKNOWN_BT)
1493     type = bidi_type_of_next_char (bidi_it);
1494
1495   if (type == NEUTRAL_B)
1496     return bidi_it->resolved_level;
1497
1498   level = bidi_it->level_stack[bidi_it->stack_idx].level;
1499   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1500       || (type == WEAK_BN && prev_level == level))
1501     {
1502       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1503         abort ();
1504
1505       /* If the cached state shows a neutral character, it was not
1506          resolved by bidi_resolve_neutral, so do it now.  */
1507       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1508                                      bidi_it->next_for_neutral.type,
1509                                      level);
1510     }
1511
1512   if (!(type == STRONG_R
1513         || type == STRONG_L
1514         || type == WEAK_BN
1515         || type == WEAK_EN
1516         || type == WEAK_AN))
1517     abort ();
1518   bidi_it->type = type;
1519   bidi_check_type (bidi_it->type);
1520
1521   /* For L1 below, we need to know, for each WS character, whether
1522      it belongs to a sequence of WS characters preceding a newline
1523      or a TAB or a paragraph separator.  */
1524   if (bidi_it->orig_type == NEUTRAL_WS
1525       && bidi_it->next_for_ws.type == UNKNOWN_BT)
1526     {
1527       int ch;
1528       int clen = bidi_it->ch_len;
1529       EMACS_INT bpos = bidi_it->bytepos;
1530       EMACS_INT cpos = bidi_it->charpos;
1531       bidi_type_t chtype;
1532
1533       do {
1534         /*_fetch_multibyte_char_len = 1;*/
1535         ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
1536         bpos += clen;
1537         cpos++;
1538         clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1539         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1540           chtype = NEUTRAL_B;
1541         else
1542           chtype = bidi_get_type (ch, NEUTRAL_DIR);
1543       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1544                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1545       bidi_it->next_for_ws.type = chtype;
1546       bidi_check_type (bidi_it->next_for_ws.type);
1547       bidi_it->next_for_ws.charpos = cpos;
1548       bidi_it->next_for_ws.bytepos = bpos;
1549     }
1550
1551   /* Resolve implicit levels, with a twist: PDFs get the embedding
1552      level of the enbedding they terminate.  See below for the
1553      reason.  */
1554   if (bidi_it->orig_type == PDF
1555       /* Don't do this if this formatting code didn't change the
1556          embedding level due to invalid or empty embeddings.  */
1557       && prev_level != level)
1558     {
1559       /* Don't look in UAX#9 for the reason for this: it's our own
1560          private quirk.  The reason is that we want the formatting
1561          codes to be delivered so that they bracket the text of their
1562          embedding.  For example, given the text
1563
1564              {RLO}teST{PDF}
1565
1566          we want it to be displayed as
1567
1568              {PDF}STet{RLO}
1569
1570          not as
1571
1572              STet{RLO}{PDF}
1573
1574          which will result because we bump up the embedding level as
1575          soon as we see the RLO and pop it as soon as we see the PDF,
1576          so RLO itself has the same embedding level as "teST", and
1577          thus would be normally delivered last, just before the PDF.
1578          The switch below fiddles with the level of PDF so that this
1579          ugly side effect does not happen.
1580
1581          (This is, of course, only important if the formatting codes
1582          are actually displayed, but Emacs does need to display them
1583          if the user wants to.)  */
1584       level = prev_level;
1585     }
1586   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1587            || bidi_it->orig_type == NEUTRAL_S
1588            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1589            /* || bidi_it->ch == LINESEP_CHAR */
1590            || (bidi_it->orig_type == NEUTRAL_WS
1591                && (bidi_it->next_for_ws.type == NEUTRAL_B
1592                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
1593     level = bidi_it->level_stack[0].level;
1594   else if ((level & 1) == 0) /* I1 */
1595     {
1596       if (type == STRONG_R)
1597         level++;
1598       else if (type == WEAK_EN || type == WEAK_AN)
1599         level += 2;
1600     }
1601   else                  /* I2 */
1602     {
1603       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1604         level++;
1605     }
1606
1607   bidi_it->resolved_level = level;
1608   return level;
1609 }
1610
1611 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
1612    non-zero, we are at the end of a level, and we need to prepare to
1613    resume the scan of the lower level.
1614
1615    If this level's other edge is cached, we simply jump to it, filling
1616    the iterator structure with the iterator state on the other edge.
1617    Otherwise, we walk the buffer until we come back to the same level
1618    as LEVEL.
1619
1620    Note: we are not talking here about a ``level run'' in the UAX#9
1621    sense of the term, but rather about a ``level'' which includes
1622    all the levels higher than it.  In other words, given the levels
1623    like this:
1624
1625          11111112222222333333334443343222222111111112223322111
1626                 A      B                    C
1627
1628    and assuming we are at point A scanning left to right, this
1629    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1630    at point B.  */
1631 static void
1632 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1633 {
1634   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1635   int idx;
1636
1637   /* Try the cache first.  */
1638   if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1639     bidi_cache_fetch_state (idx, bidi_it);
1640   else
1641     {
1642       int new_level;
1643
1644       if (end_flag)
1645         abort (); /* if we are at end of level, its edges must be cached */
1646
1647       bidi_cache_iterator_state (bidi_it, 1);
1648       do {
1649         new_level = bidi_level_of_next_char (bidi_it);
1650         bidi_cache_iterator_state (bidi_it, 1);
1651       } while (new_level >= level);
1652     }
1653 }
1654
1655 void
1656 bidi_move_to_visually_next (struct bidi_it *bidi_it)
1657 {
1658   int old_level, new_level, next_level;
1659   struct bidi_it sentinel;
1660
1661   if (bidi_it->scan_dir == 0)
1662     {
1663       bidi_it->scan_dir = 1;    /* default to logical order */
1664     }
1665
1666   /* If we just passed a newline, initialize for the next line.  */
1667   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1668     bidi_line_init (bidi_it);
1669
1670   /* Prepare the sentinel iterator state, and cache it.  When we bump
1671      into it, scanning backwards, we'll know that the last non-base
1672      level is exhausted.  */
1673   if (bidi_cache_idx == 0)
1674     {
1675       bidi_copy_it (&sentinel, bidi_it);
1676       if (bidi_it->first_elt)
1677         {
1678           sentinel.charpos--;   /* cached charpos needs to be monotonic */
1679           sentinel.bytepos--;
1680           sentinel.ch = '\n';   /* doesn't matter, but why not? */
1681           sentinel.ch_len = 1;
1682         }
1683       bidi_cache_iterator_state (&sentinel, 1);
1684     }
1685
1686   old_level = bidi_it->resolved_level;
1687   new_level = bidi_level_of_next_char (bidi_it);
1688
1689   /* Reordering of resolved levels (clause L2) is implemented by
1690      jumping to the other edge of the level and flipping direction of
1691      scanning the text whenever we find a level change.  */
1692   if (new_level != old_level)
1693     {
1694       int ascending = new_level > old_level;
1695       int level_to_search = ascending ? old_level + 1 : old_level;
1696       int incr = ascending ? 1 : -1;
1697       int expected_next_level = old_level + incr;
1698
1699       /* Jump (or walk) to the other edge of this level.  */
1700       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1701       /* Switch scan direction and peek at the next character in the
1702          new direction.  */
1703       bidi_it->scan_dir = -bidi_it->scan_dir;
1704
1705       /* The following loop handles the case where the resolved level
1706          jumps by more than one.  This is typical for numbers inside a
1707          run of text with left-to-right embedding direction, but can
1708          also happen in other situations.  In those cases the decision
1709          where to continue after a level change, and in what direction,
1710          is tricky.  For example, given a text like below:
1711
1712                   abcdefgh
1713                   11336622
1714
1715          (where the numbers below the text show the resolved levels),
1716          the result of reordering according to UAX#9 should be this:
1717
1718                   efdcghba
1719
1720          This is implemented by the loop below which flips direction
1721          and jumps to the other edge of the level each time it finds
1722          the new level not to be the expected one.  The expected level
1723          is always one more or one less than the previous one.  */
1724       next_level = bidi_peek_at_next_level (bidi_it);
1725       while (next_level != expected_next_level)
1726         {
1727           expected_next_level += incr;
1728           level_to_search += incr;
1729           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1730           bidi_it->scan_dir = -bidi_it->scan_dir;
1731           next_level = bidi_peek_at_next_level (bidi_it);
1732         }
1733
1734       /* Finally, deliver the next character in the new direction.  */
1735       next_level = bidi_level_of_next_char (bidi_it);
1736     }
1737
1738   /* Take note when we have just processed the newline that precedes
1739      the end of the paragraph.  The next time we are about to be
1740      called, set_iterator_to_next will automatically reinit the
1741      paragraph direction, if needed.  We do this at the newline before
1742      the paragraph separator, because the next character might not be
1743      the first character of the next paragraph, due to the bidi
1744      reordering, whereas we _must_ know the paragraph base direction
1745      _before_ we process the paragraph's text, since the base
1746      direction affects the reordering.  */
1747   if (bidi_it->scan_dir == 1
1748       && bidi_it->orig_type == NEUTRAL_B
1749       && bidi_it->bytepos < ZV_BYTE)
1750     {
1751       EMACS_INT sep_len =
1752         bidi_at_paragraph_end (bidi_it->charpos + 1,
1753                                bidi_it->bytepos + bidi_it->ch_len);
1754       if (sep_len >= 0)
1755         {
1756           bidi_it->new_paragraph = 1;
1757           /* Record the buffer position of the last character of the
1758              paragraph separator.  */
1759           bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
1760         }
1761     }
1762
1763   if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1764     {
1765       /* If we are at paragraph's base embedding level and beyond the
1766          last cached position, the cache's job is done and we can
1767          discard it.  */
1768       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1769           && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1770         bidi_cache_reset ();
1771         /* But as long as we are caching during forward scan, we must
1772            cache each state, or else the cache integrity will be
1773            compromised: it assumes cached states correspond to buffer
1774            positions 1:1.  */
1775       else
1776         bidi_cache_iterator_state (bidi_it, 1);
1777     }
1778 }
1779
1780 /* This is meant to be called from within the debugger, whenever you
1781    wish to examine the cache contents.  */
1782 void
1783 bidi_dump_cached_states (void)
1784 {
1785   int i;
1786   int ndigits = 1;
1787
1788   if (bidi_cache_idx == 0)
1789     {
1790       fprintf (stderr, "The cache is empty.\n");
1791       return;
1792     }
1793   fprintf (stderr, "Total of %d state%s in cache:\n",
1794            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1795
1796   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1797     ndigits++;
1798   fputs ("ch  ", stderr);
1799   for (i = 0; i < bidi_cache_idx; i++)
1800     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1801   fputs ("\n", stderr);
1802   fputs ("lvl ", stderr);
1803   for (i = 0; i < bidi_cache_idx; i++)
1804     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1805   fputs ("\n", stderr);
1806   fputs ("pos ", stderr);
1807   for (i = 0; i < bidi_cache_idx; i++)
1808     fprintf (stderr, "%*ld", ndigits, (long)bidi_cache[i].charpos);
1809   fputs ("\n", stderr);
1810 }