src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications decribed in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but setting it to zero will take them
  86    into account.  */
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static inline bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 abort ();       /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static inline void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static inline bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       int v = XINT (val);
 208
 209       if (v < 0 || v > MAX_CHAR)
 210         abort ();
 211
 212       return v;
 213     }
 214
 215   return c;
 216 }
 217
 218 /* Determine the start-of-run (sor) directional type given the two
 219    embedding levels on either side of the run boundary.  Also, update
 220    the saved info about previously seen characters, since that info is
 221    generally valid for a single level run.  */
 222 static inline void
 223 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 224 {
 225   int higher_level = level_before > level_after ? level_before : level_after;
 226
 227   /* The prev_was_pdf gork is required for when we have several PDFs
 228      in a row.  In that case, we want to compute the sor type for the
 229      next level run only once: when we see the first PDF.  That's
 230      because the sor type depends only on the higher of the two levels
 231      that we find on the two sides of the level boundary (see UAX#9,
 232      clause X10), and so we don't need to know the final embedding
 233      level to which we descend after processing all the PDFs.  */
 234   if (!bidi_it->prev_was_pdf || level_before < level_after)
 235     /* FIXME: should the default sor direction be user selectable?  */
 236     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 237   if (level_before > level_after)
 238     bidi_it->prev_was_pdf = 1;
 239
 240   bidi_it->prev.type = UNKNOWN_BT;
 241   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 242     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 243   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 244   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 245   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 246   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 247     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 248   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 249 }
 250
 251 /* Push the current embedding level and override status; reset the
 252    current level to LEVEL and the current override status to OVERRIDE.  */
 253 static inline void
 254 bidi_push_embedding_level (struct bidi_it *bidi_it,
 255                            int level, bidi_dir_t override)
 256 {
 257   bidi_it->stack_idx++;
 258   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 259   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 260   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 261 }
 262
 263 /* Pop the embedding level and directional override status from the
 264    stack, and return the new level.  */
 265 static inline int
 266 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 267 {
 268   /* UAX#9 says to ignore invalid PDFs.  */
 269   if (bidi_it->stack_idx > 0)
 270     bidi_it->stack_idx--;
 271   return bidi_it->level_stack[bidi_it->stack_idx].level;
 272 }
 273
 274 /* Record in SAVED_INFO the information about the current character.  */
 275 static inline void
 276 bidi_remember_char (struct bidi_saved_info *saved_info,
 277                     struct bidi_it *bidi_it)
 278 {
 279   saved_info->charpos = bidi_it->charpos;
 280   saved_info->bytepos = bidi_it->bytepos;
 281   saved_info->type = bidi_it->type;
 282   bidi_check_type (bidi_it->type);
 283   saved_info->type_after_w1 = bidi_it->type_after_w1;
 284   bidi_check_type (bidi_it->type_after_w1);
 285   saved_info->orig_type = bidi_it->orig_type;
 286   bidi_check_type (bidi_it->orig_type);
 287 }
 288
 289 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 290    copies the part of the level stack that is actually in use.  */
 291 static inline void
 292 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 293 {
 294   int i;
 295
 296   /* Copy everything except the level stack and beyond.  */
 297   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 298
 299   /* Copy the active part of the level stack.  */
 300   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 301   for (i = 1; i <= from->stack_idx; i++)
 302     to->level_stack[i] = from->level_stack[i];
 303 }
 304
 305 \f
 306 /***********************************************************************
 307                         Caching the bidi iterator states
 308  ***********************************************************************/
 309
 310 #define BIDI_CACHE_CHUNK 200
 311 static struct bidi_it *bidi_cache;
 312 static ptrdiff_t bidi_cache_size = 0;
 313 enum { elsz = sizeof (struct bidi_it) };
 314 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 315 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 316 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 317                                            "stack" level */
 318
 319 /* 5-slot stack for saving the start of the previous level of the
 320    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 321    and we need the same size of our stack.  */
 322 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 323 static int bidi_cache_sp;
 324
 325 /* Size of header used by bidi_shelve_cache.  */
 326 enum
 327   {
 328     bidi_shelve_header_size =
 329       (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 330        + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 331        + sizeof (bidi_cache_last_idx))
 332   };
 333
 334 /* Reset the cache state to the empty state.  We only reset the part
 335    of the cache relevant to iteration of the current object.  Previous
 336    objects, which are pushed on the display iterator's stack, are left
 337    intact.  This is called when the cached information is no more
 338    useful for the current iteration, e.g. when we were reseated to a
 339    new position on the same object.  */
 340 static inline void
 341 bidi_cache_reset (void)
 342 {
 343   bidi_cache_idx = bidi_cache_start;
 344   bidi_cache_last_idx = -1;
 345 }
 346
 347 /* Shrink the cache to its minimal size.  Called when we init the bidi
 348    iterator for reordering a buffer or a string that does not come
 349    from display properties, because that means all the previously
 350    cached info is of no further use.  */
 351 static inline void
 352 bidi_cache_shrink (void)
 353 {
 354   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 355     {
 356       bidi_cache =
 357         (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 358       bidi_cache_size = BIDI_CACHE_CHUNK;
 359     }
 360   bidi_cache_reset ();
 361 }
 362
 363 static inline void
 364 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 365 {
 366   int current_scan_dir = bidi_it->scan_dir;
 367
 368   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 369     abort ();
 370
 371   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 372   bidi_it->scan_dir = current_scan_dir;
 373   bidi_cache_last_idx = idx;
 374 }
 375
 376 /* Find a cached state with a given CHARPOS and resolved embedding
 377    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 378    resolved levels in cached states.  DIR, if non-zero, means search
 379    in that direction from the last cache hit.  */
 380 static inline ptrdiff_t
 381 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 382 {
 383   ptrdiff_t i, i_start;
 384
 385   if (bidi_cache_idx > bidi_cache_start)
 386     {
 387       if (bidi_cache_last_idx == -1)
 388         bidi_cache_last_idx = bidi_cache_idx - 1;
 389       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 390         {
 391           dir = -1;
 392           i_start = bidi_cache_last_idx - 1;
 393         }
 394       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 395                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 396         {
 397           dir = 1;
 398           i_start = bidi_cache_last_idx + 1;
 399         }
 400       else if (dir)
 401         i_start = bidi_cache_last_idx;
 402       else
 403         {
 404           dir = -1;
 405           i_start = bidi_cache_idx - 1;
 406         }
 407
 408       if (dir < 0)
 409         {
 410           /* Linear search for now; FIXME!  */
 411           for (i = i_start; i >= bidi_cache_start; i--)
 412             if (bidi_cache[i].charpos <= charpos
 413                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 414                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 415               return i;
 416         }
 417       else
 418         {
 419           for (i = i_start; i < bidi_cache_idx; i++)
 420             if (bidi_cache[i].charpos <= charpos
 421                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 422                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 423               return i;
 424         }
 425     }
 426
 427   return -1;
 428 }
 429
 430 /* Find a cached state where the resolved level changes to a value
 431    that is lower than LEVEL, and return its cache slot index.  DIR is
 432    the direction to search, starting with the last used cache slot.
 433    If DIR is zero, we search backwards from the last occupied cache
 434    slot.  BEFORE, if non-zero, means return the index of the slot that
 435    is ``before'' the level change in the search direction.  That is,
 436    given the cached levels like this:
 437
 438          1122333442211
 439           AB        C
 440
 441    and assuming we are at the position cached at the slot marked with
 442    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 443    index of slot B or A, depending whether BEFORE is, respectively,
 444    non-zero or zero.  */
 445 static ptrdiff_t
 446 bidi_cache_find_level_change (int level, int dir, int before)
 447 {
 448   if (bidi_cache_idx)
 449     {
 450       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 451       int incr = before ? 1 : 0;
 452
 453       xassert (!dir || bidi_cache_last_idx >= 0);
 454
 455       if (!dir)
 456         dir = -1;
 457       else if (!incr)
 458         i += dir;
 459
 460       if (dir < 0)
 461         {
 462           while (i >= bidi_cache_start + incr)
 463             {
 464               if (bidi_cache[i - incr].resolved_level >= 0
 465                   && bidi_cache[i - incr].resolved_level < level)
 466                 return i;
 467               i--;
 468             }
 469         }
 470       else
 471         {
 472           while (i < bidi_cache_idx - incr)
 473             {
 474               if (bidi_cache[i + incr].resolved_level >= 0
 475                   && bidi_cache[i + incr].resolved_level < level)
 476                 return i;
 477               i++;
 478             }
 479         }
 480     }
 481
 482   return -1;
 483 }
 484
 485 static inline void
 486 bidi_cache_ensure_space (ptrdiff_t idx)
 487 {
 488   /* Enlarge the cache as needed.  */
 489   if (idx >= bidi_cache_size)
 490     {
 491       /* The bidi cache cannot be larger than the largest Lisp string
 492          or buffer.  */
 493       ptrdiff_t string_or_buffer_bound =
 494         max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 495
 496       /* Also, it cannot be larger than what C can represent.  */
 497       ptrdiff_t c_bound =
 498         (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 499
 500       bidi_cache =
 501         xpalloc (bidi_cache, &bidi_cache_size,
 502                  max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 503                  min (string_or_buffer_bound, c_bound), elsz);
 504     }
 505 }
 506
 507 static inline void
 508 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 509 {
 510   ptrdiff_t idx;
 511
 512   /* We should never cache on backward scans.  */
 513   if (bidi_it->scan_dir == -1)
 514     abort ();
 515   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 516
 517   if (idx < 0)
 518     {
 519       idx = bidi_cache_idx;
 520       bidi_cache_ensure_space (idx);
 521       /* Character positions should correspond to cache positions 1:1.
 522          If we are outside the range of cached positions, the cache is
 523          useless and must be reset.  */
 524       if (idx > bidi_cache_start &&
 525           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 526                                + bidi_cache[idx - 1].nchars)
 527            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 528         {
 529           bidi_cache_reset ();
 530           idx = bidi_cache_start;
 531         }
 532       if (bidi_it->nchars <= 0)
 533         abort ();
 534       bidi_copy_it (&bidi_cache[idx], bidi_it);
 535       if (!resolved)
 536         bidi_cache[idx].resolved_level = -1;
 537     }
 538   else
 539     {
 540       /* Copy only the members which could have changed, to avoid
 541          costly copying of the entire struct.  */
 542       bidi_cache[idx].type = bidi_it->type;
 543       bidi_check_type (bidi_it->type);
 544       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 545       bidi_check_type (bidi_it->type_after_w1);
 546       if (resolved)
 547         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 548       else
 549         bidi_cache[idx].resolved_level = -1;
 550       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 551       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 552       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 553       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 554       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 555       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 556       bidi_cache[idx].disp_prop_p = bidi_it->disp_prop_p;
 557     }
 558
 559   bidi_cache_last_idx = idx;
 560   if (idx >= bidi_cache_idx)
 561     bidi_cache_idx = idx + 1;
 562 }
 563
 564 static inline bidi_type_t
 565 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 566 {
 567   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 568
 569   if (i >= bidi_cache_start)
 570     {
 571       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 572
 573       bidi_copy_it (bidi_it, &bidi_cache[i]);
 574       bidi_cache_last_idx = i;
 575       /* Don't let scan direction from from the cached state override
 576          the current scan direction.  */
 577       bidi_it->scan_dir = current_scan_dir;
 578       return bidi_it->type;
 579     }
 580
 581   return UNKNOWN_BT;
 582 }
 583
 584 static inline int
 585 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 586 {
 587   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 588     abort ();
 589   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 590 }
 591
 592 \f
 593 /***********************************************************************
 594              Pushing and popping the bidi iterator state
 595  ***********************************************************************/
 596
 597 /* Push the bidi iterator state in preparation for reordering a
 598    different object, e.g. display string found at certain buffer
 599    position.  Pushing the bidi iterator boils down to saving its
 600    entire state on the cache and starting a new cache "stacked" on top
 601    of the current cache.  */
 602 void
 603 bidi_push_it (struct bidi_it *bidi_it)
 604 {
 605   /* Save the current iterator state in its entirety after the last
 606      used cache slot.  */
 607   bidi_cache_ensure_space (bidi_cache_idx);
 608   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 609
 610   /* Push the current cache start onto the stack.  */
 611   xassert (bidi_cache_sp < IT_STACK_SIZE);
 612   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 613
 614   /* Start a new level of cache, and make it empty.  */
 615   bidi_cache_start = bidi_cache_idx;
 616   bidi_cache_last_idx = -1;
 617 }
 618
 619 /* Restore the iterator state saved by bidi_push_it and return the
 620    cache to the corresponding state.  */
 621 void
 622 bidi_pop_it (struct bidi_it *bidi_it)
 623 {
 624   if (bidi_cache_start <= 0)
 625     abort ();
 626
 627   /* Reset the next free cache slot index to what it was before the
 628      call to bidi_push_it.  */
 629   bidi_cache_idx = bidi_cache_start - 1;
 630
 631   /* Restore the bidi iterator state saved in the cache.  */
 632   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 633
 634   /* Pop the previous cache start from the stack.  */
 635   if (bidi_cache_sp <= 0)
 636     abort ();
 637   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 638
 639   /* Invalidate the last-used cache slot data.  */
 640   bidi_cache_last_idx = -1;
 641 }
 642
 643 static ptrdiff_t bidi_cache_total_alloc;
 644
 645 /* Stash away a copy of the cache and its control variables.  */
 646 void *
 647 bidi_shelve_cache (void)
 648 {
 649   unsigned char *databuf;
 650   ptrdiff_t alloc;
 651
 652   /* Empty cache.  */
 653   if (bidi_cache_idx == 0)
 654     return NULL;
 655
 656   alloc = (bidi_shelve_header_size
 657            + bidi_cache_idx * sizeof (struct bidi_it));
 658   databuf = xmalloc (alloc);
 659   bidi_cache_total_alloc += alloc;
 660
 661   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 662   memcpy (databuf + sizeof (bidi_cache_idx),
 663           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 664   memcpy (databuf + sizeof (bidi_cache_idx)
 665           + bidi_cache_idx * sizeof (struct bidi_it),
 666           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 667   memcpy (databuf + sizeof (bidi_cache_idx)
 668           + bidi_cache_idx * sizeof (struct bidi_it)
 669           + sizeof (bidi_cache_start_stack),
 670           &bidi_cache_sp, sizeof (bidi_cache_sp));
 671   memcpy (databuf + sizeof (bidi_cache_idx)
 672           + bidi_cache_idx * sizeof (struct bidi_it)
 673           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 674           &bidi_cache_start, sizeof (bidi_cache_start));
 675   memcpy (databuf + sizeof (bidi_cache_idx)
 676           + bidi_cache_idx * sizeof (struct bidi_it)
 677           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 678           + sizeof (bidi_cache_start),
 679           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 680
 681   return databuf;
 682 }
 683
 684 /* Restore the cache state from a copy stashed away by
 685    bidi_shelve_cache, and free the buffer used to stash that copy.
 686    JUST_FREE non-zero means free the buffer, but don't restore the
 687    cache; used when the corresponding iterator is discarded instead of
 688    being restored.  */
 689 void
 690 bidi_unshelve_cache (void *databuf, int just_free)
 691 {
 692   unsigned char *p = databuf;
 693
 694   if (!p)
 695     {
 696       if (!just_free)
 697         {
 698           /* A NULL pointer means an empty cache.  */
 699           bidi_cache_start = 0;
 700           bidi_cache_sp = 0;
 701           bidi_cache_reset ();
 702         }
 703     }
 704   else
 705     {
 706       if (just_free)
 707         {
 708           ptrdiff_t idx;
 709
 710           memcpy (&idx, p, sizeof (bidi_cache_idx));
 711           bidi_cache_total_alloc -=
 712             bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 713         }
 714       else
 715         {
 716           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 717           bidi_cache_ensure_space (bidi_cache_idx);
 718           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 719                   bidi_cache_idx * sizeof (struct bidi_it));
 720           memcpy (bidi_cache_start_stack,
 721                   p + sizeof (bidi_cache_idx)
 722                   + bidi_cache_idx * sizeof (struct bidi_it),
 723                   sizeof (bidi_cache_start_stack));
 724           memcpy (&bidi_cache_sp,
 725                   p + sizeof (bidi_cache_idx)
 726                   + bidi_cache_idx * sizeof (struct bidi_it)
 727                   + sizeof (bidi_cache_start_stack),
 728                   sizeof (bidi_cache_sp));
 729           memcpy (&bidi_cache_start,
 730                   p + sizeof (bidi_cache_idx)
 731                   + bidi_cache_idx * sizeof (struct bidi_it)
 732                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 733                   sizeof (bidi_cache_start));
 734           memcpy (&bidi_cache_last_idx,
 735                   p + sizeof (bidi_cache_idx)
 736                   + bidi_cache_idx * sizeof (struct bidi_it)
 737                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 738                   + sizeof (bidi_cache_start),
 739                   sizeof (bidi_cache_last_idx));
 740           bidi_cache_total_alloc -=
 741             bidi_shelve_header_size + bidi_cache_idx * sizeof (struct bidi_it);
 742         }
 743
 744       xfree (p);
 745     }
 746 }
 747
 748 \f
 749 /***********************************************************************
 750                         Initialization
 751  ***********************************************************************/
 752 static void
 753 bidi_initialize (void)
 754 {
 755   bidi_type_table = uniprop_table (intern ("bidi-class"));
 756   if (NILP (bidi_type_table))
 757     abort ();
 758   staticpro (&bidi_type_table);
 759
 760   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 761   if (NILP (bidi_mirror_table))
 762     abort ();
 763   staticpro (&bidi_mirror_table);
 764
 765   Qparagraph_start = intern ("paragraph-start");
 766   staticpro (&Qparagraph_start);
 767   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 768   if (!STRINGP (paragraph_start_re))
 769     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 770   staticpro (&paragraph_start_re);
 771   Qparagraph_separate = intern ("paragraph-separate");
 772   staticpro (&Qparagraph_separate);
 773   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 774   if (!STRINGP (paragraph_separate_re))
 775     paragraph_separate_re = build_string ("[ \t\f]*$");
 776   staticpro (&paragraph_separate_re);
 777
 778   bidi_cache_sp = 0;
 779   bidi_cache_total_alloc = 0;
 780
 781   bidi_initialized = 1;
 782 }
 783
 784 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 785    end.  */
 786 static inline void
 787 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 788 {
 789   bidi_it->invalid_levels = 0;
 790   bidi_it->invalid_rl_levels = -1;
 791   bidi_it->stack_idx = 0;
 792   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 793 }
 794
 795 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 796 void
 797 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 798               struct bidi_it *bidi_it)
 799 {
 800   if (! bidi_initialized)
 801     bidi_initialize ();
 802   if (charpos >= 0)
 803     bidi_it->charpos = charpos;
 804   if (bytepos >= 0)
 805     bidi_it->bytepos = bytepos;
 806   bidi_it->frame_window_p = frame_window_p;
 807   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 808   bidi_it->first_elt = 1;
 809   bidi_set_paragraph_end (bidi_it);
 810   bidi_it->new_paragraph = 1;
 811   bidi_it->separator_limit = -1;
 812   bidi_it->type = NEUTRAL_B;
 813   bidi_it->type_after_w1 = NEUTRAL_B;
 814   bidi_it->orig_type = NEUTRAL_B;
 815   bidi_it->prev_was_pdf = 0;
 816   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 817     bidi_it->prev.orig_type = UNKNOWN_BT;
 818   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 819     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 820   bidi_it->next_for_neutral.charpos = -1;
 821   bidi_it->next_for_neutral.type =
 822     bidi_it->next_for_neutral.type_after_w1 =
 823     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 824   bidi_it->prev_for_neutral.charpos = -1;
 825   bidi_it->prev_for_neutral.type =
 826     bidi_it->prev_for_neutral.type_after_w1 =
 827     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 828   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 829   bidi_it->disp_pos = -1;       /* invalid/unknown */
 830   bidi_it->disp_prop_p = 0;
 831   /* We can only shrink the cache if we are at the bottom level of its
 832      "stack".  */
 833   if (bidi_cache_start == 0)
 834     bidi_cache_shrink ();
 835   else
 836     bidi_cache_reset ();
 837 }
 838
 839 /* Perform initializations for reordering a new line of bidi text.  */
 840 static void
 841 bidi_line_init (struct bidi_it *bidi_it)
 842 {
 843   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 844   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 845   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 846   bidi_it->invalid_levels = 0;
 847   bidi_it->invalid_rl_levels = -1;
 848   bidi_it->next_en_pos = -1;
 849   bidi_it->next_for_ws.type = UNKNOWN_BT;
 850   bidi_set_sor_type (bidi_it,
 851                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 852                      bidi_it->level_stack[0].level); /* X10 */
 853
 854   bidi_cache_reset ();
 855 }
 856
 857 \f
 858 /***********************************************************************
 859                         Fetching characters
 860  ***********************************************************************/
 861
 862 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 863    are zero-based character positions in S, BEGBYTE is byte position
 864    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 865    string.  */
 866 static inline EMACS_INT
 867 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 868                   const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
 869 {
 870   EMACS_INT pos = beg;
 871   const unsigned char *p = s + begbyte, *start = p;
 872
 873   if (unibyte)
 874     p = s + end;
 875   else
 876     {
 877       if (!CHAR_HEAD_P (*p))
 878         abort ();
 879
 880       while (pos < end)
 881         {
 882           p += BYTES_BY_CHAR_HEAD (*p);
 883           pos++;
 884         }
 885     }
 886
 887   return p - start;
 888 }
 889
 890 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 891    non-NULL, fetch the character from string S; otherwise fetch the
 892    character from the current buffer.  UNIBYTE non-zero means S is a
 893    unibyte string.  */
 894 static inline int
 895 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
 896 {
 897   if (s)
 898     {
 899       if (unibyte)
 900         return s[bytepos];
 901       else
 902         return STRING_CHAR (s + bytepos);
 903     }
 904   else
 905     return FETCH_MULTIBYTE_CHAR (bytepos);
 906 }
 907
 908 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 909    character is covered by a display string, treat the entire run of
 910    covered characters as a single character u+FFFC, and return their
 911    combined length in CH_LEN and NCHARS.  DISP_POS specifies the
 912    character position of the next display string, or -1 if not yet
 913    computed.  DISP_PROP_P non-zero means that there's really a display
 914    string at DISP_POS, as opposed to when we searched till DISP_POS
 915    without findingone.  When the next character is at or beyond that
 916    position, the function updates DISP_POS with the position of the
 917    next display string.  STRING->s is the C string to iterate, or NULL
 918    if iterating over a buffer or a Lisp string; in the latter case,
 919    STRING->lstring is the Lisp string.  */
 920 static inline int
 921 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 922                  int *disp_prop_p, struct bidi_string_data *string,
 923                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 924 {
 925   int ch;
 926   EMACS_INT endpos =
 927     (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 928   struct text_pos pos;
 929
 930   /* If we got past the last known position of display string, compute
 931      the position of the next one.  That position could be at CHARPOS.  */
 932   if (charpos < endpos && charpos > *disp_pos)
 933     {
 934       SET_TEXT_POS (pos, charpos, bytepos);
 935       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 936                                               disp_prop_p);
 937     }
 938
 939   /* Fetch the character at BYTEPOS.  */
 940   if (charpos >= endpos)
 941     {
 942       ch = BIDI_EOB;
 943       *ch_len = 1;
 944       *nchars = 1;
 945       *disp_pos = endpos;
 946       *disp_prop_p = 0;
 947     }
 948   else if (charpos >= *disp_pos && *disp_prop_p)
 949     {
 950       EMACS_INT disp_end_pos;
 951
 952       /* We don't expect to find ourselves in the middle of a display
 953          property.  Hopefully, it will never be needed.  */
 954       if (charpos > *disp_pos)
 955         abort ();
 956       /* Return the Unicode Object Replacement Character to represent
 957          the entire run of characters covered by the display string.  */
 958       ch = 0xFFFC;
 959       disp_end_pos = compute_display_string_end (*disp_pos, string);
 960       *nchars = disp_end_pos - *disp_pos;
 961       if (*nchars <= 0)
 962         abort ();
 963       if (string->s)
 964         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 965                                     disp_end_pos, string->unibyte);
 966       else if (STRINGP (string->lstring))
 967         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 968                                     bytepos, disp_end_pos, string->unibyte);
 969       else
 970         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 971     }
 972   else
 973     {
 974       if (string->s)
 975         {
 976           int len;
 977
 978           if (!string->unibyte)
 979             {
 980               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
 981               *ch_len = len;
 982             }
 983           else
 984             {
 985               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
 986               *ch_len = 1;
 987             }
 988         }
 989       else if (STRINGP (string->lstring))
 990         {
 991           int len;
 992
 993           if (!string->unibyte)
 994             {
 995               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
 996                                            len);
 997               *ch_len = len;
 998             }
 999           else
1000             {
1001               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1002               *ch_len = 1;
1003             }
1004         }
1005       else
1006         {
1007           ch = FETCH_MULTIBYTE_CHAR (bytepos);
1008           *ch_len = CHAR_BYTES (ch);
1009         }
1010       *nchars = 1;
1011     }
1012
1013   /* If we just entered a run of characters covered by a display
1014      string, compute the position of the next display string.  */
1015   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1016       && *disp_prop_p)
1017     {
1018       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1019       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1020                                               disp_prop_p);
1021     }
1022
1023   return ch;
1024 }
1025
1026 \f
1027 /***********************************************************************
1028                         Determining paragraph direction
1029  ***********************************************************************/
1030
1031 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1032    Value is the non-negative length of the paragraph separator
1033    following the buffer position, -1 if position is at the beginning
1034    of a new paragraph, or -2 if position is neither at beginning nor
1035    at end of a paragraph.  */
1036 static EMACS_INT
1037 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1038 {
1039   Lisp_Object sep_re;
1040   Lisp_Object start_re;
1041   EMACS_INT val;
1042
1043   sep_re = paragraph_separate_re;
1044   start_re = paragraph_start_re;
1045
1046   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1047   if (val < 0)
1048     {
1049       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1050         val = -1;
1051       else
1052         val = -2;
1053     }
1054
1055   return val;
1056 }
1057
1058 /* Find the beginning of this paragraph by looking back in the buffer.
1059    Value is the byte position of the paragraph's beginning.  */
1060 static EMACS_INT
1061 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
1062 {
1063   Lisp_Object re = paragraph_start_re;
1064   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1065
1066   while (pos_byte > BEGV_BYTE
1067          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1068     {
1069       /* FIXME: What if the paragraph beginning is covered by a
1070          display string?  And what if a display string covering some
1071          of the text over which we scan back includes
1072          paragraph_start_re?  */
1073       pos = find_next_newline_no_quit (pos - 1, -1);
1074       pos_byte = CHAR_TO_BYTE (pos);
1075     }
1076   return pos_byte;
1077 }
1078
1079 /* Determine the base direction, a.k.a. base embedding level, of the
1080    paragraph we are about to iterate through.  If DIR is either L2R or
1081    R2L, just use that.  Otherwise, determine the paragraph direction
1082    from the first strong directional character of the paragraph.
1083
1084    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1085    has no strong directional characters and both DIR and
1086    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1087    in the buffer until a paragraph is found with a strong character,
1088    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1089    flag is used in current-bidi-paragraph-direction.
1090
1091    Note that this function gives the paragraph separator the same
1092    direction as the preceding paragraph, even though Emacs generally
1093    views the separartor as not belonging to any paragraph.  */
1094 void
1095 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1096 {
1097   EMACS_INT bytepos = bidi_it->bytepos;
1098   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1099   EMACS_INT pstartbyte;
1100   /* Note that begbyte is a byte position, while end is a character
1101      position.  Yes, this is ugly, but we are trying to avoid costly
1102      calls to BYTE_TO_CHAR and its ilk.  */
1103   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1104   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
1105
1106   /* Special case for an empty buffer. */
1107   if (bytepos == begbyte && bidi_it->charpos == end)
1108     dir = L2R;
1109   /* We should never be called at EOB or before BEGV.  */
1110   else if (bidi_it->charpos >= end || bytepos < begbyte)
1111     abort ();
1112
1113   if (dir == L2R)
1114     {
1115       bidi_it->paragraph_dir = L2R;
1116       bidi_it->new_paragraph = 0;
1117     }
1118   else if (dir == R2L)
1119     {
1120       bidi_it->paragraph_dir = R2L;
1121       bidi_it->new_paragraph = 0;
1122     }
1123   else if (dir == NEUTRAL_DIR)  /* P2 */
1124     {
1125       int ch;
1126       EMACS_INT ch_len, nchars;
1127       EMACS_INT pos, disp_pos = -1;
1128       int disp_prop_p = 0;
1129       bidi_type_t type;
1130       const unsigned char *s;
1131
1132       if (!bidi_initialized)
1133         bidi_initialize ();
1134
1135       /* If we are inside a paragraph separator, we are just waiting
1136          for the separator to be exhausted; use the previous paragraph
1137          direction.  But don't do that if we have been just reseated,
1138          because we need to reinitialize below in that case.  */
1139       if (!bidi_it->first_elt
1140           && bidi_it->charpos < bidi_it->separator_limit)
1141         return;
1142
1143       /* If we are on a newline, get past it to where the next
1144          paragraph might start.  But don't do that at BEGV since then
1145          we are potentially in a new paragraph that doesn't yet
1146          exist.  */
1147       pos = bidi_it->charpos;
1148       s = STRINGP (bidi_it->string.lstring) ?
1149         SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1150       if (bytepos > begbyte
1151           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1152         {
1153           bytepos++;
1154           pos++;
1155         }
1156
1157       /* We are either at the beginning of a paragraph or in the
1158          middle of it.  Find where this paragraph starts.  */
1159       if (string_p)
1160         {
1161           /* We don't support changes of paragraph direction inside a
1162              string.  It is treated as a single paragraph.  */
1163           pstartbyte = 0;
1164         }
1165       else
1166         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1167       bidi_it->separator_limit = -1;
1168       bidi_it->new_paragraph = 0;
1169
1170       /* The following loop is run more than once only if NO_DEFAULT_P
1171          is non-zero, and only if we are iterating on a buffer.  */
1172       do {
1173         bytepos = pstartbyte;
1174         if (!string_p)
1175           pos = BYTE_TO_CHAR (bytepos);
1176         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop_p,
1177                               &bidi_it->string,
1178                               bidi_it->frame_window_p, &ch_len, &nchars);
1179         type = bidi_get_type (ch, NEUTRAL_DIR);
1180
1181         for (pos += nchars, bytepos += ch_len;
1182              (bidi_get_category (type) != STRONG)
1183                || (bidi_ignore_explicit_marks_for_paragraph_level
1184                    && (type == RLE || type == RLO
1185                        || type == LRE || type == LRO));
1186              type = bidi_get_type (ch, NEUTRAL_DIR))
1187           {
1188             if (pos >= end)
1189               {
1190                 /* Pretend there's a paragraph separator at end of
1191                    buffer/string.  */
1192                 type = NEUTRAL_B;
1193                 break;
1194               }
1195             if (!string_p
1196                 && type == NEUTRAL_B
1197                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1198               break;
1199             /* Fetch next character and advance to get past it.  */
1200             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1201                                   &disp_prop_p, &bidi_it->string,
1202                                   bidi_it->frame_window_p, &ch_len, &nchars);
1203             pos += nchars;
1204             bytepos += ch_len;
1205           }
1206         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1207             || (!bidi_ignore_explicit_marks_for_paragraph_level
1208                 && (type == RLO || type == RLE)))
1209           bidi_it->paragraph_dir = R2L;
1210         else if (type == STRONG_L
1211                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1212                      && (type == LRO || type == LRE)))
1213           bidi_it->paragraph_dir = L2R;
1214         if (!string_p
1215             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1216           {
1217             /* If this paragraph is at BEGV, default to L2R.  */
1218             if (pstartbyte == BEGV_BYTE)
1219               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1220             else
1221               {
1222                 EMACS_INT prevpbyte = pstartbyte;
1223                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1224
1225                 /* Find the beginning of the previous paragraph, if any.  */
1226                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1227                   {
1228                     /* FXIME: What if p is covered by a display
1229                        string?  See also a FIXME inside
1230                        bidi_find_paragraph_start.  */
1231                     p--;
1232                     pbyte = CHAR_TO_BYTE (p);
1233                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1234                   }
1235                 pstartbyte = prevpbyte;
1236               }
1237           }
1238       } while (!string_p
1239                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1240     }
1241   else
1242     abort ();
1243
1244   /* Contrary to UAX#9 clause P3, we only default the paragraph
1245      direction to L2R if we have no previous usable paragraph
1246      direction.  This is allowed by the HL1 clause.  */
1247   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1248     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1249   if (bidi_it->paragraph_dir == R2L)
1250     bidi_it->level_stack[0].level = 1;
1251   else
1252     bidi_it->level_stack[0].level = 0;
1253
1254   bidi_line_init (bidi_it);
1255 }
1256
1257 \f
1258 /***********************************************************************
1259                  Resolving explicit and implicit levels.
1260   The rest of this file constitutes the core of the UBA implementation.
1261  ***********************************************************************/
1262
1263 static inline int
1264 bidi_explicit_dir_char (int ch)
1265 {
1266   bidi_type_t ch_type;
1267
1268   if (!bidi_initialized)
1269     abort ();
1270   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1271   return (ch_type == LRE || ch_type == LRO
1272           || ch_type == RLE || ch_type == RLO
1273           || ch_type == PDF);
1274 }
1275
1276 /* A helper function for bidi_resolve_explicit.  It advances to the
1277    next character in logical order and determines the new embedding
1278    level and directional override, but does not take into account
1279    empty embeddings.  */
1280 static int
1281 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1282 {
1283   int curchar;
1284   bidi_type_t type;
1285   int current_level;
1286   int new_level;
1287   bidi_dir_t override;
1288   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1289
1290   /* If reseat()'ed, don't advance, so as to start iteration from the
1291      position where we were reseated.  bidi_it->bytepos can be less
1292      than BEGV_BYTE after reseat to BEGV.  */
1293   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1294       || bidi_it->first_elt)
1295     {
1296       bidi_it->first_elt = 0;
1297       if (string_p)
1298         {
1299           const unsigned char *p =
1300             STRINGP (bidi_it->string.lstring)
1301             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1302
1303           if (bidi_it->charpos < 0)
1304             bidi_it->charpos = 0;
1305           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1306                                                bidi_it->string.unibyte);
1307         }
1308       else
1309         {
1310           if (bidi_it->charpos < BEGV)
1311             bidi_it->charpos = BEGV;
1312           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1313         }
1314     }
1315   /* Don't move at end of buffer/string.  */
1316   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1317     {
1318       /* Advance to the next character, skipping characters covered by
1319          display strings (nchars > 1).  */
1320       if (bidi_it->nchars <= 0)
1321         abort ();
1322       bidi_it->charpos += bidi_it->nchars;
1323       if (bidi_it->ch_len == 0)
1324         abort ();
1325       bidi_it->bytepos += bidi_it->ch_len;
1326     }
1327
1328   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1329   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1330   new_level = current_level;
1331
1332   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1333     {
1334       curchar = BIDI_EOB;
1335       bidi_it->ch_len = 1;
1336       bidi_it->nchars = 1;
1337       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1338       bidi_it->disp_prop_p = 0;
1339     }
1340   else
1341     {
1342       /* Fetch the character at BYTEPOS.  If it is covered by a
1343          display string, treat the entire run of covered characters as
1344          a single character u+FFFC.  */
1345       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1346                                  &bidi_it->disp_pos, &bidi_it->disp_prop_p,
1347                                  &bidi_it->string, bidi_it->frame_window_p,
1348                                  &bidi_it->ch_len, &bidi_it->nchars);
1349     }
1350   bidi_it->ch = curchar;
1351
1352   /* Don't apply directional override here, as all the types we handle
1353      below will not be affected by the override anyway, and we need
1354      the original type unaltered.  The override will be applied in
1355      bidi_resolve_weak.  */
1356   type = bidi_get_type (curchar, NEUTRAL_DIR);
1357   bidi_it->orig_type = type;
1358   bidi_check_type (bidi_it->orig_type);
1359
1360   if (type != PDF)
1361     bidi_it->prev_was_pdf = 0;
1362
1363   bidi_it->type_after_w1 = UNKNOWN_BT;
1364
1365   switch (type)
1366     {
1367       case RLE: /* X2 */
1368       case RLO: /* X4 */
1369         bidi_it->type_after_w1 = type;
1370         bidi_check_type (bidi_it->type_after_w1);
1371         type = WEAK_BN; /* X9/Retaining */
1372         if (bidi_it->ignore_bn_limit <= -1)
1373           {
1374             if (current_level <= BIDI_MAXLEVEL - 4)
1375               {
1376                 /* Compute the least odd embedding level greater than
1377                    the current level.  */
1378                 new_level = ((current_level + 1) & ~1) + 1;
1379                 if (bidi_it->type_after_w1 == RLE)
1380                   override = NEUTRAL_DIR;
1381                 else
1382                   override = R2L;
1383                 if (current_level == BIDI_MAXLEVEL - 4)
1384                   bidi_it->invalid_rl_levels = 0;
1385                 bidi_push_embedding_level (bidi_it, new_level, override);
1386               }
1387             else
1388               {
1389                 bidi_it->invalid_levels++;
1390                 /* See the commentary about invalid_rl_levels below.  */
1391                 if (bidi_it->invalid_rl_levels < 0)
1392                   bidi_it->invalid_rl_levels = 0;
1393                 bidi_it->invalid_rl_levels++;
1394               }
1395           }
1396         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1397                  || bidi_it->next_en_pos > bidi_it->charpos)
1398           type = WEAK_EN;
1399         break;
1400       case LRE: /* X3 */
1401       case LRO: /* X5 */
1402         bidi_it->type_after_w1 = type;
1403         bidi_check_type (bidi_it->type_after_w1);
1404         type = WEAK_BN; /* X9/Retaining */
1405         if (bidi_it->ignore_bn_limit <= -1)
1406           {
1407             if (current_level <= BIDI_MAXLEVEL - 5)
1408               {
1409                 /* Compute the least even embedding level greater than
1410                    the current level.  */
1411                 new_level = ((current_level + 2) & ~1);
1412                 if (bidi_it->type_after_w1 == LRE)
1413                   override = NEUTRAL_DIR;
1414                 else
1415                   override = L2R;
1416                 bidi_push_embedding_level (bidi_it, new_level, override);
1417               }
1418             else
1419               {
1420                 bidi_it->invalid_levels++;
1421                 /* invalid_rl_levels counts invalid levels encountered
1422                    while the embedding level was already too high for
1423                    LRE/LRO, but not for RLE/RLO.  That is because
1424                    there may be exactly one PDF which we should not
1425                    ignore even though invalid_levels is non-zero.
1426                    invalid_rl_levels helps to know what PDF is
1427                    that.  */
1428                 if (bidi_it->invalid_rl_levels >= 0)
1429                   bidi_it->invalid_rl_levels++;
1430               }
1431           }
1432         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1433                  || bidi_it->next_en_pos > bidi_it->charpos)
1434           type = WEAK_EN;
1435         break;
1436       case PDF: /* X7 */
1437         bidi_it->type_after_w1 = type;
1438         bidi_check_type (bidi_it->type_after_w1);
1439         type = WEAK_BN; /* X9/Retaining */
1440         if (bidi_it->ignore_bn_limit <= -1)
1441           {
1442             if (!bidi_it->invalid_rl_levels)
1443               {
1444                 new_level = bidi_pop_embedding_level (bidi_it);
1445                 bidi_it->invalid_rl_levels = -1;
1446                 if (bidi_it->invalid_levels)
1447                   bidi_it->invalid_levels--;
1448                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1449               }
1450             if (!bidi_it->invalid_levels)
1451               new_level = bidi_pop_embedding_level (bidi_it);
1452             else
1453               {
1454                 bidi_it->invalid_levels--;
1455                 bidi_it->invalid_rl_levels--;
1456               }
1457           }
1458         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1459                  || bidi_it->next_en_pos > bidi_it->charpos)
1460           type = WEAK_EN;
1461         break;
1462       default:
1463         /* Nothing.  */
1464         break;
1465     }
1466
1467   bidi_it->type = type;
1468   bidi_check_type (bidi_it->type);
1469
1470   return new_level;
1471 }
1472
1473 /* Given an iterator state in BIDI_IT, advance one character position
1474    in the buffer/string to the next character (in the logical order),
1475    resolve any explicit embeddings and directional overrides, and
1476    return the embedding level of the character after resolving
1477    explicit directives and ignoring empty embeddings.  */
1478 static int
1479 bidi_resolve_explicit (struct bidi_it *bidi_it)
1480 {
1481   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1482   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1483   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1484   const unsigned char *s = STRINGP (bidi_it->string.lstring)
1485     ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1486
1487   if (prev_level < new_level
1488       && bidi_it->type == WEAK_BN
1489       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1490       && bidi_it->charpos < eob         /* not already at EOB */
1491       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1492                                                    + bidi_it->ch_len, s,
1493                                                    bidi_it->string.unibyte)))
1494     {
1495       /* Avoid pushing and popping embedding levels if the level run
1496          is empty, as this breaks level runs where it shouldn't.
1497          UAX#9 removes all the explicit embedding and override codes,
1498          so empty embeddings disappear without a trace.  We need to
1499          behave as if we did the same.  */
1500       struct bidi_it saved_it;
1501       int level = prev_level;
1502
1503       bidi_copy_it (&saved_it, bidi_it);
1504
1505       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1506                                                        + bidi_it->ch_len, s,
1507                                                        bidi_it->string.unibyte)))
1508         {
1509           /* This advances to the next character, skipping any
1510              characters covered by display strings.  */
1511           level = bidi_resolve_explicit_1 (bidi_it);
1512           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1513              a pointer to its data is no longer valid.  */
1514           if (STRINGP (bidi_it->string.lstring))
1515             s = SDATA (bidi_it->string.lstring);
1516         }
1517
1518       if (bidi_it->nchars <= 0)
1519         abort ();
1520       if (level == prev_level)  /* empty embedding */
1521         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1522       else                      /* this embedding is non-empty */
1523         saved_it.ignore_bn_limit = -2;
1524
1525       bidi_copy_it (bidi_it, &saved_it);
1526       if (bidi_it->ignore_bn_limit > -1)
1527         {
1528           /* We pushed a level, but we shouldn't have.  Undo that. */
1529           if (!bidi_it->invalid_rl_levels)
1530             {
1531               new_level = bidi_pop_embedding_level (bidi_it);
1532               bidi_it->invalid_rl_levels = -1;
1533               if (bidi_it->invalid_levels)
1534                 bidi_it->invalid_levels--;
1535             }
1536           if (!bidi_it->invalid_levels)
1537             new_level = bidi_pop_embedding_level (bidi_it);
1538           else
1539             {
1540               bidi_it->invalid_levels--;
1541               bidi_it->invalid_rl_levels--;
1542             }
1543         }
1544     }
1545
1546   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1547     {
1548       bidi_set_paragraph_end (bidi_it);
1549       /* This is needed by bidi_resolve_weak below, and in L1.  */
1550       bidi_it->type_after_w1 = bidi_it->type;
1551       bidi_check_type (bidi_it->type_after_w1);
1552     }
1553
1554   return new_level;
1555 }
1556
1557 /* Advance in the buffer/string, resolve weak types and return the
1558    type of the next character after weak type resolution.  */
1559 static bidi_type_t
1560 bidi_resolve_weak (struct bidi_it *bidi_it)
1561 {
1562   bidi_type_t type;
1563   bidi_dir_t override;
1564   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1565   int new_level  = bidi_resolve_explicit (bidi_it);
1566   int next_char;
1567   bidi_type_t type_of_next;
1568   struct bidi_it saved_it;
1569   EMACS_INT eob =
1570     (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1571     ? bidi_it->string.schars : ZV;
1572
1573   type = bidi_it->type;
1574   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1575
1576   if (type == UNKNOWN_BT
1577       || type == LRE
1578       || type == LRO
1579       || type == RLE
1580       || type == RLO
1581       || type == PDF)
1582     abort ();
1583
1584   if (new_level != prev_level
1585       || bidi_it->type == NEUTRAL_B)
1586     {
1587       /* We've got a new embedding level run, compute the directional
1588          type of sor and initialize per-run variables (UAX#9, clause
1589          X10).  */
1590       bidi_set_sor_type (bidi_it, prev_level, new_level);
1591     }
1592   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1593            || type == WEAK_BN || type == STRONG_AL)
1594     bidi_it->type_after_w1 = type;      /* needed in L1 */
1595   bidi_check_type (bidi_it->type_after_w1);
1596
1597   /* Level and directional override status are already recorded in
1598      bidi_it, and do not need any change; see X6.  */
1599   if (override == R2L)          /* X6 */
1600     type = STRONG_R;
1601   else if (override == L2R)
1602     type = STRONG_L;
1603   else
1604     {
1605       if (type == WEAK_NSM)     /* W1 */
1606         {
1607           /* Note that we don't need to consider the case where the
1608              prev character has its type overridden by an RLO or LRO,
1609              because then either the type of this NSM would have been
1610              also overridden, or the previous character is outside the
1611              current level run, and thus not relevant to this NSM.
1612              This is why NSM gets the type_after_w1 of the previous
1613              character.  */
1614           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1615               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1616               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1617             type = bidi_it->prev.type_after_w1;
1618           else if (bidi_it->sor == R2L)
1619             type = STRONG_R;
1620           else if (bidi_it->sor == L2R)
1621             type = STRONG_L;
1622           else /* shouldn't happen! */
1623             abort ();
1624         }
1625       if (type == WEAK_EN       /* W2 */
1626           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1627         type = WEAK_AN;
1628       else if (type == STRONG_AL) /* W3 */
1629         type = STRONG_R;
1630       else if ((type == WEAK_ES /* W4 */
1631                 && bidi_it->prev.type_after_w1 == WEAK_EN
1632                 && bidi_it->prev.orig_type == WEAK_EN)
1633                || (type == WEAK_CS
1634                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1635                         && bidi_it->prev.orig_type == WEAK_EN)
1636                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1637         {
1638           const unsigned char *s =
1639             STRINGP (bidi_it->string.lstring)
1640             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1641
1642           next_char =
1643             bidi_it->charpos + bidi_it->nchars >= eob
1644             ? BIDI_EOB
1645             : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1646                                 bidi_it->string.unibyte);
1647           type_of_next = bidi_get_type (next_char, override);
1648
1649           if (type_of_next == WEAK_BN
1650               || bidi_explicit_dir_char (next_char))
1651             {
1652               bidi_copy_it (&saved_it, bidi_it);
1653               while (bidi_resolve_explicit (bidi_it) == new_level
1654                      && bidi_it->type == WEAK_BN)
1655                 ;
1656               type_of_next = bidi_it->type;
1657               bidi_copy_it (bidi_it, &saved_it);
1658             }
1659
1660           /* If the next character is EN, but the last strong-type
1661              character is AL, that next EN will be changed to AN when
1662              we process it in W2 above.  So in that case, this ES
1663              should not be changed into EN.  */
1664           if (type == WEAK_ES
1665               && type_of_next == WEAK_EN
1666               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1667             type = WEAK_EN;
1668           else if (type == WEAK_CS)
1669             {
1670               if (bidi_it->prev.type_after_w1 == WEAK_AN
1671                   && (type_of_next == WEAK_AN
1672                       /* If the next character is EN, but the last
1673                          strong-type character is AL, EN will be later
1674                          changed to AN when we process it in W2 above.
1675                          So in that case, this ES should not be
1676                          changed into EN.  */
1677                       || (type_of_next == WEAK_EN
1678                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1679                 type = WEAK_AN;
1680               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1681                        && type_of_next == WEAK_EN
1682                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1683                 type = WEAK_EN;
1684             }
1685         }
1686       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1687                || type == WEAK_BN)      /* W5/Retaining */
1688         {
1689           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1690               || bidi_it->next_en_pos > bidi_it->charpos)
1691             type = WEAK_EN;
1692           else                  /* W5: ET/BN with EN after it.  */
1693             {
1694               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1695               const unsigned char *s =
1696                 STRINGP (bidi_it->string.lstring)
1697                 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1698
1699               if (bidi_it->nchars <= 0)
1700                 abort ();
1701               next_char =
1702                 bidi_it->charpos + bidi_it->nchars >= eob
1703                 ? BIDI_EOB
1704                 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1705                                     bidi_it->string.unibyte);
1706               type_of_next = bidi_get_type (next_char, override);
1707
1708               if (type_of_next == WEAK_ET
1709                   || type_of_next == WEAK_BN
1710                   || bidi_explicit_dir_char (next_char))
1711                 {
1712                   bidi_copy_it (&saved_it, bidi_it);
1713                   while (bidi_resolve_explicit (bidi_it) == new_level
1714                          && (bidi_it->type == WEAK_BN
1715                              || bidi_it->type == WEAK_ET))
1716                     ;
1717                   type_of_next = bidi_it->type;
1718                   en_pos = bidi_it->charpos;
1719                   bidi_copy_it (bidi_it, &saved_it);
1720                 }
1721               if (type_of_next == WEAK_EN)
1722                 {
1723                   /* If the last strong character is AL, the EN we've
1724                      found will become AN when we get to it (W2). */
1725                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1726                     {
1727                       type = WEAK_EN;
1728                       /* Remember this EN position, to speed up processing
1729                          of the next ETs.  */
1730                       bidi_it->next_en_pos = en_pos;
1731                     }
1732                   else if (type == WEAK_BN)
1733                     type = NEUTRAL_ON; /* W6/Retaining */
1734                 }
1735             }
1736         }
1737     }
1738
1739   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1740       || (type == WEAK_BN
1741           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1742               || bidi_it->prev.type_after_w1 == WEAK_ES
1743               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1744     type = NEUTRAL_ON;
1745
1746   /* Store the type we've got so far, before we clobber it with strong
1747      types in W7 and while resolving neutral types.  But leave alone
1748      the original types that were recorded above, because we will need
1749      them for the L1 clause.  */
1750   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1751     bidi_it->type_after_w1 = type;
1752   bidi_check_type (bidi_it->type_after_w1);
1753
1754   if (type == WEAK_EN)  /* W7 */
1755     {
1756       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1757           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1758         type = STRONG_L;
1759     }
1760
1761   bidi_it->type = type;
1762   bidi_check_type (bidi_it->type);
1763   return type;
1764 }
1765
1766 /* Resolve the type of a neutral character according to the type of
1767    surrounding strong text and the current embedding level.  */
1768 static inline bidi_type_t
1769 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1770 {
1771   /* N1: European and Arabic numbers are treated as though they were R.  */
1772   if (next_type == WEAK_EN || next_type == WEAK_AN)
1773     next_type = STRONG_R;
1774   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1775     prev_type = STRONG_R;
1776
1777   if (next_type == prev_type)   /* N1 */
1778     return next_type;
1779   else if ((lev & 1) == 0)      /* N2 */
1780     return STRONG_L;
1781   else
1782     return STRONG_R;
1783 }
1784
1785 static bidi_type_t
1786 bidi_resolve_neutral (struct bidi_it *bidi_it)
1787 {
1788   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1789   bidi_type_t type = bidi_resolve_weak (bidi_it);
1790   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1791
1792   if (!(type == STRONG_R
1793         || type == STRONG_L
1794         || type == WEAK_BN
1795         || type == WEAK_EN
1796         || type == WEAK_AN
1797         || type == NEUTRAL_B
1798         || type == NEUTRAL_S
1799         || type == NEUTRAL_WS
1800         || type == NEUTRAL_ON))
1801     abort ();
1802
1803   if (bidi_get_category (type) == NEUTRAL
1804       || (type == WEAK_BN && prev_level == current_level))
1805     {
1806       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1807         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1808                                        bidi_it->next_for_neutral.type,
1809                                        current_level);
1810       else
1811         {
1812           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1813              the assumption of batch-style processing; see clauses W4,
1814              W5, and especially N1, which require to look far forward
1815              (as well as back) in the buffer/string.  May the fleas of
1816              a thousand camels infest the armpits of those who design
1817              supposedly general-purpose algorithms by looking at their
1818              own implementations, and fail to consider other possible
1819              implementations!  */
1820           struct bidi_it saved_it;
1821           bidi_type_t next_type;
1822
1823           if (bidi_it->scan_dir == -1)
1824             abort ();
1825
1826           bidi_copy_it (&saved_it, bidi_it);
1827           /* Scan the text forward until we find the first non-neutral
1828              character, and then use that to resolve the neutral we
1829              are dealing with now.  We also cache the scanned iterator
1830              states, to salvage some of the effort later.  */
1831           bidi_cache_iterator_state (bidi_it, 0);
1832           do {
1833             /* Record the info about the previous character, so that
1834                it will be cached below with this state.  */
1835             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1836                 && bidi_it->type != WEAK_BN)
1837               bidi_remember_char (&bidi_it->prev, bidi_it);
1838             type = bidi_resolve_weak (bidi_it);
1839             /* Paragraph separators have their levels fully resolved
1840                at this point, so cache them as resolved.  */
1841             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1842             /* FIXME: implement L1 here, by testing for a newline and
1843                resetting the level for any sequence of whitespace
1844                characters adjacent to it.  */
1845           } while (!(type == NEUTRAL_B
1846                      || (type != WEAK_BN
1847                          && bidi_get_category (type) != NEUTRAL)
1848                      /* This is all per level run, so stop when we
1849                         reach the end of this level run.  */
1850                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1851                      current_level));
1852
1853           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1854
1855           switch (type)
1856             {
1857               case STRONG_L:
1858               case STRONG_R:
1859               case STRONG_AL:
1860                 next_type = type;
1861                 break;
1862               case WEAK_EN:
1863               case WEAK_AN:
1864                 /* N1: ``European and Arabic numbers are treated as
1865                    though they were R.''  */
1866                 next_type = STRONG_R;
1867                 saved_it.next_for_neutral.type = STRONG_R;
1868                 break;
1869               case WEAK_BN:
1870                 if (!bidi_explicit_dir_char (bidi_it->ch))
1871                   abort ();             /* can't happen: BNs are skipped */
1872                 /* FALLTHROUGH */
1873               case NEUTRAL_B:
1874                 /* Marched all the way to the end of this level run.
1875                    We need to use the eor type, whose information is
1876                    stored by bidi_set_sor_type in the prev_for_neutral
1877                    member.  */
1878                 if (saved_it.type != WEAK_BN
1879                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1880                   {
1881                     next_type = bidi_it->prev_for_neutral.type;
1882                     saved_it.next_for_neutral.type = next_type;
1883                     bidi_check_type (next_type);
1884                   }
1885                 else
1886                   {
1887                     /* This is a BN which does not adjoin neutrals.
1888                        Leave its type alone.  */
1889                     bidi_copy_it (bidi_it, &saved_it);
1890                     return bidi_it->type;
1891                   }
1892                 break;
1893               default:
1894                 abort ();
1895             }
1896           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1897                                          next_type, current_level);
1898           saved_it.type = type;
1899           bidi_check_type (type);
1900           bidi_copy_it (bidi_it, &saved_it);
1901         }
1902     }
1903   return type;
1904 }
1905
1906 /* Given an iterator state in BIDI_IT, advance one character position
1907    in the buffer/string to the next character (in the logical order),
1908    resolve the bidi type of that next character, and return that
1909    type.  */
1910 static bidi_type_t
1911 bidi_type_of_next_char (struct bidi_it *bidi_it)
1912 {
1913   bidi_type_t type;
1914
1915   /* This should always be called during a forward scan.  */
1916   if (bidi_it->scan_dir != 1)
1917     abort ();
1918
1919   /* Reset the limit until which to ignore BNs if we step out of the
1920      area where we found only empty levels.  */
1921   if ((bidi_it->ignore_bn_limit > -1
1922        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1923       || (bidi_it->ignore_bn_limit == -2
1924           && !bidi_explicit_dir_char (bidi_it->ch)))
1925     bidi_it->ignore_bn_limit = -1;
1926
1927   type = bidi_resolve_neutral (bidi_it);
1928
1929   return type;
1930 }
1931
1932 /* Given an iterator state BIDI_IT, advance one character position in
1933    the buffer/string to the next character (in the current scan
1934    direction), resolve the embedding and implicit levels of that next
1935    character, and return the resulting level.  */
1936 static int
1937 bidi_level_of_next_char (struct bidi_it *bidi_it)
1938 {
1939   bidi_type_t type;
1940   int level, prev_level = -1;
1941   struct bidi_saved_info next_for_neutral;
1942   EMACS_INT next_char_pos = -2;
1943
1944   if (bidi_it->scan_dir == 1)
1945     {
1946       EMACS_INT eob =
1947         (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1948         ? bidi_it->string.schars : ZV;
1949
1950       /* There's no sense in trying to advance if we hit end of text.  */
1951       if (bidi_it->charpos >= eob)
1952         return bidi_it->resolved_level;
1953
1954       /* Record the info about the previous character.  */
1955       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1956           && bidi_it->type != WEAK_BN)
1957         bidi_remember_char (&bidi_it->prev, bidi_it);
1958       if (bidi_it->type_after_w1 == STRONG_R
1959           || bidi_it->type_after_w1 == STRONG_L
1960           || bidi_it->type_after_w1 == STRONG_AL)
1961         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1962       /* FIXME: it sounds like we don't need both prev and
1963          prev_for_neutral members, but I'm leaving them both for now.  */
1964       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1965           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1966         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1967
1968       /* If we overstepped the characters used for resolving neutrals
1969          and whitespace, invalidate their info in the iterator.  */
1970       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1971         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1972       if (bidi_it->next_en_pos >= 0
1973           && bidi_it->charpos >= bidi_it->next_en_pos)
1974         bidi_it->next_en_pos = -1;
1975       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1976           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1977         bidi_it->next_for_ws.type = UNKNOWN_BT;
1978
1979       /* This must be taken before we fill the iterator with the info
1980          about the next char.  If we scan backwards, the iterator
1981          state must be already cached, so there's no need to know the
1982          embedding level of the previous character, since we will be
1983          returning to our caller shortly.  */
1984       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1985     }
1986   next_for_neutral = bidi_it->next_for_neutral;
1987
1988   /* Perhaps the character we want is already cached.  If it is, the
1989      call to bidi_cache_find below will return a type other than
1990      UNKNOWN_BT.  */
1991   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
1992     {
1993       int bob =
1994         (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
1995
1996       if (bidi_it->scan_dir > 0)
1997         {
1998           if (bidi_it->nchars <= 0)
1999             abort ();
2000           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2001         }
2002       else if (bidi_it->charpos >= bob)
2003         /* Implementation note: we allow next_char_pos to be as low as
2004            0 for buffers or -1 for strings, and that is okay because
2005            that's the "position" of the sentinel iterator state we
2006            cached at the beginning of the iteration.  */
2007         next_char_pos = bidi_it->charpos - 1;
2008       if (next_char_pos >= bob - 1)
2009         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2010       else
2011         type = UNKNOWN_BT;
2012     }
2013   else
2014     type = UNKNOWN_BT;
2015   if (type != UNKNOWN_BT)
2016     {
2017       /* Don't lose the information for resolving neutrals!  The
2018          cached states could have been cached before their
2019          next_for_neutral member was computed.  If we are on our way
2020          forward, we can simply take the info from the previous
2021          state.  */
2022       if (bidi_it->scan_dir == 1
2023           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2024         bidi_it->next_for_neutral = next_for_neutral;
2025
2026       /* If resolved_level is -1, it means this state was cached
2027          before it was completely resolved, so we cannot return
2028          it.  */
2029       if (bidi_it->resolved_level != -1)
2030         return bidi_it->resolved_level;
2031     }
2032   if (bidi_it->scan_dir == -1)
2033     /* If we are going backwards, the iterator state is already cached
2034        from previous scans, and should be fully resolved.  */
2035     abort ();
2036
2037   if (type == UNKNOWN_BT)
2038     type = bidi_type_of_next_char (bidi_it);
2039
2040   if (type == NEUTRAL_B)
2041     return bidi_it->resolved_level;
2042
2043   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2044   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2045       || (type == WEAK_BN && prev_level == level))
2046     {
2047       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2048         abort ();
2049
2050       /* If the cached state shows a neutral character, it was not
2051          resolved by bidi_resolve_neutral, so do it now.  */
2052       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2053                                      bidi_it->next_for_neutral.type,
2054                                      level);
2055     }
2056
2057   if (!(type == STRONG_R
2058         || type == STRONG_L
2059         || type == WEAK_BN
2060         || type == WEAK_EN
2061         || type == WEAK_AN))
2062     abort ();
2063   bidi_it->type = type;
2064   bidi_check_type (bidi_it->type);
2065
2066   /* For L1 below, we need to know, for each WS character, whether
2067      it belongs to a sequence of WS characters preceding a newline
2068      or a TAB or a paragraph separator.  */
2069   if (bidi_it->orig_type == NEUTRAL_WS
2070       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2071     {
2072       int ch;
2073       EMACS_INT clen = bidi_it->ch_len;
2074       EMACS_INT bpos = bidi_it->bytepos;
2075       EMACS_INT cpos = bidi_it->charpos;
2076       EMACS_INT disp_pos = bidi_it->disp_pos;
2077       EMACS_INT nc = bidi_it->nchars;
2078       struct bidi_string_data bs = bidi_it->string;
2079       bidi_type_t chtype;
2080       int fwp = bidi_it->frame_window_p;
2081       int dpp = bidi_it->disp_prop_p;
2082
2083       if (bidi_it->nchars <= 0)
2084         abort ();
2085       do {
2086         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2087                               fwp, &clen, &nc);
2088         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
2089           chtype = NEUTRAL_B;
2090         else
2091           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2092       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2093                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2094       bidi_it->next_for_ws.type = chtype;
2095       bidi_check_type (bidi_it->next_for_ws.type);
2096       bidi_it->next_for_ws.charpos = cpos;
2097       bidi_it->next_for_ws.bytepos = bpos;
2098     }
2099
2100   /* Resolve implicit levels, with a twist: PDFs get the embedding
2101      level of the enbedding they terminate.  See below for the
2102      reason.  */
2103   if (bidi_it->orig_type == PDF
2104       /* Don't do this if this formatting code didn't change the
2105          embedding level due to invalid or empty embeddings.  */
2106       && prev_level != level)
2107     {
2108       /* Don't look in UAX#9 for the reason for this: it's our own
2109          private quirk.  The reason is that we want the formatting
2110          codes to be delivered so that they bracket the text of their
2111          embedding.  For example, given the text
2112
2113              {RLO}teST{PDF}
2114
2115          we want it to be displayed as
2116
2117              {PDF}STet{RLO}
2118
2119          not as
2120
2121              STet{RLO}{PDF}
2122
2123          which will result because we bump up the embedding level as
2124          soon as we see the RLO and pop it as soon as we see the PDF,
2125          so RLO itself has the same embedding level as "teST", and
2126          thus would be normally delivered last, just before the PDF.
2127          The switch below fiddles with the level of PDF so that this
2128          ugly side effect does not happen.
2129
2130          (This is, of course, only important if the formatting codes
2131          are actually displayed, but Emacs does need to display them
2132          if the user wants to.)  */
2133       level = prev_level;
2134     }
2135   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2136            || bidi_it->orig_type == NEUTRAL_S
2137            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2138            /* || bidi_it->ch == LINESEP_CHAR */
2139            || (bidi_it->orig_type == NEUTRAL_WS
2140                && (bidi_it->next_for_ws.type == NEUTRAL_B
2141                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2142     level = bidi_it->level_stack[0].level;
2143   else if ((level & 1) == 0) /* I1 */
2144     {
2145       if (type == STRONG_R)
2146         level++;
2147       else if (type == WEAK_EN || type == WEAK_AN)
2148         level += 2;
2149     }
2150   else                  /* I2 */
2151     {
2152       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2153         level++;
2154     }
2155
2156   bidi_it->resolved_level = level;
2157   return level;
2158 }
2159
2160 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2161    non-zero, we are at the end of a level, and we need to prepare to
2162    resume the scan of the lower level.
2163
2164    If this level's other edge is cached, we simply jump to it, filling
2165    the iterator structure with the iterator state on the other edge.
2166    Otherwise, we walk the buffer or string until we come back to the
2167    same level as LEVEL.
2168
2169    Note: we are not talking here about a ``level run'' in the UAX#9
2170    sense of the term, but rather about a ``level'' which includes
2171    all the levels higher than it.  In other words, given the levels
2172    like this:
2173
2174          11111112222222333333334443343222222111111112223322111
2175                 A      B                    C
2176
2177    and assuming we are at point A scanning left to right, this
2178    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2179    at point B.  */
2180 static void
2181 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2182 {
2183   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2184   ptrdiff_t idx;
2185
2186   /* Try the cache first.  */
2187   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2188       >= bidi_cache_start)
2189     bidi_cache_fetch_state (idx, bidi_it);
2190   else
2191     {
2192       int new_level;
2193
2194       if (end_flag)
2195         abort (); /* if we are at end of level, its edges must be cached */
2196
2197       bidi_cache_iterator_state (bidi_it, 1);
2198       do {
2199         new_level = bidi_level_of_next_char (bidi_it);
2200         bidi_cache_iterator_state (bidi_it, 1);
2201       } while (new_level >= level);
2202     }
2203 }
2204
2205 void
2206 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2207 {
2208   int old_level, new_level, next_level;
2209   struct bidi_it sentinel;
2210   struct gcpro gcpro1;
2211
2212   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2213     abort ();
2214
2215   if (bidi_it->scan_dir == 0)
2216     {
2217       bidi_it->scan_dir = 1;    /* default to logical order */
2218     }
2219
2220   /* The code below can call eval, and thus cause GC.  If we are
2221      iterating a Lisp string, make sure it won't be GCed.  */
2222   if (STRINGP (bidi_it->string.lstring))
2223     GCPRO1 (bidi_it->string.lstring);
2224
2225   /* If we just passed a newline, initialize for the next line.  */
2226   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
2227     bidi_line_init (bidi_it);
2228
2229   /* Prepare the sentinel iterator state, and cache it.  When we bump
2230      into it, scanning backwards, we'll know that the last non-base
2231      level is exhausted.  */
2232   if (bidi_cache_idx == bidi_cache_start)
2233     {
2234       bidi_copy_it (&sentinel, bidi_it);
2235       if (bidi_it->first_elt)
2236         {
2237           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2238           sentinel.bytepos--;
2239           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2240           sentinel.ch_len = 1;
2241           sentinel.nchars = 1;
2242         }
2243       bidi_cache_iterator_state (&sentinel, 1);
2244     }
2245
2246   old_level = bidi_it->resolved_level;
2247   new_level = bidi_level_of_next_char (bidi_it);
2248
2249   /* Reordering of resolved levels (clause L2) is implemented by
2250      jumping to the other edge of the level and flipping direction of
2251      scanning the text whenever we find a level change.  */
2252   if (new_level != old_level)
2253     {
2254       int ascending = new_level > old_level;
2255       int level_to_search = ascending ? old_level + 1 : old_level;
2256       int incr = ascending ? 1 : -1;
2257       int expected_next_level = old_level + incr;
2258
2259       /* Jump (or walk) to the other edge of this level.  */
2260       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2261       /* Switch scan direction and peek at the next character in the
2262          new direction.  */
2263       bidi_it->scan_dir = -bidi_it->scan_dir;
2264
2265       /* The following loop handles the case where the resolved level
2266          jumps by more than one.  This is typical for numbers inside a
2267          run of text with left-to-right embedding direction, but can
2268          also happen in other situations.  In those cases the decision
2269          where to continue after a level change, and in what direction,
2270          is tricky.  For example, given a text like below:
2271
2272                   abcdefgh
2273                   11336622
2274
2275          (where the numbers below the text show the resolved levels),
2276          the result of reordering according to UAX#9 should be this:
2277
2278                   efdcghba
2279
2280          This is implemented by the loop below which flips direction
2281          and jumps to the other edge of the level each time it finds
2282          the new level not to be the expected one.  The expected level
2283          is always one more or one less than the previous one.  */
2284       next_level = bidi_peek_at_next_level (bidi_it);
2285       while (next_level != expected_next_level)
2286         {
2287           expected_next_level += incr;
2288           level_to_search += incr;
2289           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2290           bidi_it->scan_dir = -bidi_it->scan_dir;
2291           next_level = bidi_peek_at_next_level (bidi_it);
2292         }
2293
2294       /* Finally, deliver the next character in the new direction.  */
2295       next_level = bidi_level_of_next_char (bidi_it);
2296     }
2297
2298   /* Take note when we have just processed the newline that precedes
2299      the end of the paragraph.  The next time we are about to be
2300      called, set_iterator_to_next will automatically reinit the
2301      paragraph direction, if needed.  We do this at the newline before
2302      the paragraph separator, because the next character might not be
2303      the first character of the next paragraph, due to the bidi
2304      reordering, whereas we _must_ know the paragraph base direction
2305      _before_ we process the paragraph's text, since the base
2306      direction affects the reordering.  */
2307   if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
2308     {
2309       /* The paragraph direction of the entire string, once
2310          determined, is in effect for the entire string.  Setting the
2311          separator limit to the end of the string prevents
2312          bidi_paragraph_init from being called automatically on this
2313          string.  */
2314       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2315         bidi_it->separator_limit = bidi_it->string.schars;
2316       else if (bidi_it->bytepos < ZV_BYTE)
2317         {
2318           EMACS_INT sep_len =
2319             bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2320                                    bidi_it->bytepos + bidi_it->ch_len);
2321           if (bidi_it->nchars <= 0)
2322             abort ();
2323           if (sep_len >= 0)
2324             {
2325               bidi_it->new_paragraph = 1;
2326               /* Record the buffer position of the last character of the
2327                  paragraph separator.  */
2328               bidi_it->separator_limit =
2329                 bidi_it->charpos + bidi_it->nchars + sep_len;
2330             }
2331         }
2332     }
2333
2334   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2335     {
2336       /* If we are at paragraph's base embedding level and beyond the
2337          last cached position, the cache's job is done and we can
2338          discard it.  */
2339       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2340           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2341                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2342         bidi_cache_reset ();
2343         /* But as long as we are caching during forward scan, we must
2344            cache each state, or else the cache integrity will be
2345            compromised: it assumes cached states correspond to buffer
2346            positions 1:1.  */
2347       else
2348         bidi_cache_iterator_state (bidi_it, 1);
2349     }
2350
2351   if (STRINGP (bidi_it->string.lstring))
2352     UNGCPRO;
2353 }
2354
2355 /* This is meant to be called from within the debugger, whenever you
2356    wish to examine the cache contents.  */
2357 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2358 void
2359 bidi_dump_cached_states (void)
2360 {
2361   ptrdiff_t i;
2362   int ndigits = 1;
2363
2364   if (bidi_cache_idx == 0)
2365     {
2366       fprintf (stderr, "The cache is empty.\n");
2367       return;
2368     }
2369   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2370            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2371
2372   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2373     ndigits++;
2374   fputs ("ch  ", stderr);
2375   for (i = 0; i < bidi_cache_idx; i++)
2376     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2377   fputs ("\n", stderr);
2378   fputs ("lvl ", stderr);
2379   for (i = 0; i < bidi_cache_idx; i++)
2380     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2381   fputs ("\n", stderr);
2382   fputs ("pos ", stderr);
2383   for (i = 0; i < bidi_cache_idx; i++)
2384     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2385   fputs ("\n", stderr);
2386 }