src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications decribed in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but setting it to zero will take them
  86    into account.  */
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static inline bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 abort ();       /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static inline void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static inline bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       EMACS_INT v = XINT (val);
 208
 209       if (v < 0 || v > MAX_CHAR)
 210         abort ();
 211
 212       return v;
 213     }
 214
 215   return c;
 216 }
 217
 218 /* Determine the start-of-run (sor) directional type given the two
 219    embedding levels on either side of the run boundary.  Also, update
 220    the saved info about previously seen characters, since that info is
 221    generally valid for a single level run.  */
 222 static inline void
 223 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 224 {
 225   int higher_level = (level_before > level_after ? level_before : level_after);
 226
 227   /* The prev_was_pdf gork is required for when we have several PDFs
 228      in a row.  In that case, we want to compute the sor type for the
 229      next level run only once: when we see the first PDF.  That's
 230      because the sor type depends only on the higher of the two levels
 231      that we find on the two sides of the level boundary (see UAX#9,
 232      clause X10), and so we don't need to know the final embedding
 233      level to which we descend after processing all the PDFs.  */
 234   if (!bidi_it->prev_was_pdf || level_before < level_after)
 235     /* FIXME: should the default sor direction be user selectable?  */
 236     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 237   if (level_before > level_after)
 238     bidi_it->prev_was_pdf = 1;
 239
 240   bidi_it->prev.type = UNKNOWN_BT;
 241   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 242     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 243   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 244   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 245   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 246   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 247     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 248   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 249 }
 250
 251 /* Push the current embedding level and override status; reset the
 252    current level to LEVEL and the current override status to OVERRIDE.  */
 253 static inline void
 254 bidi_push_embedding_level (struct bidi_it *bidi_it,
 255                            int level, bidi_dir_t override)
 256 {
 257   bidi_it->stack_idx++;
 258   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 259   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 260   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 261 }
 262
 263 /* Pop the embedding level and directional override status from the
 264    stack, and return the new level.  */
 265 static inline int
 266 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 267 {
 268   /* UAX#9 says to ignore invalid PDFs.  */
 269   if (bidi_it->stack_idx > 0)
 270     bidi_it->stack_idx--;
 271   return bidi_it->level_stack[bidi_it->stack_idx].level;
 272 }
 273
 274 /* Record in SAVED_INFO the information about the current character.  */
 275 static inline void
 276 bidi_remember_char (struct bidi_saved_info *saved_info,
 277                     struct bidi_it *bidi_it)
 278 {
 279   saved_info->charpos = bidi_it->charpos;
 280   saved_info->bytepos = bidi_it->bytepos;
 281   saved_info->type = bidi_it->type;
 282   bidi_check_type (bidi_it->type);
 283   saved_info->type_after_w1 = bidi_it->type_after_w1;
 284   bidi_check_type (bidi_it->type_after_w1);
 285   saved_info->orig_type = bidi_it->orig_type;
 286   bidi_check_type (bidi_it->orig_type);
 287 }
 288
 289 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 290    copies the part of the level stack that is actually in use.  */
 291 static inline void
 292 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 293 {
 294   int i;
 295
 296   /* Copy everything except the level stack and beyond.  */
 297   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 298
 299   /* Copy the active part of the level stack.  */
 300   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 301   for (i = 1; i <= from->stack_idx; i++)
 302     to->level_stack[i] = from->level_stack[i];
 303 }
 304
 305 \f
 306 /***********************************************************************
 307                         Caching the bidi iterator states
 308  ***********************************************************************/
 309
 310 #define BIDI_CACHE_CHUNK 200
 311 static struct bidi_it *bidi_cache;
 312 static ptrdiff_t bidi_cache_size = 0;
 313 enum { elsz = sizeof (struct bidi_it) };
 314 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 315 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 316 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 317                                            "stack" level */
 318
 319 /* 5-slot stack for saving the start of the previous level of the
 320    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 321    and we need the same size of our stack.  */
 322 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 323 static int bidi_cache_sp;
 324
 325 /* Size of header used by bidi_shelve_cache.  */
 326 enum
 327   {
 328     bidi_shelve_header_size
 329       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 330          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 331          + sizeof (bidi_cache_last_idx))
 332   };
 333
 334 /* Reset the cache state to the empty state.  We only reset the part
 335    of the cache relevant to iteration of the current object.  Previous
 336    objects, which are pushed on the display iterator's stack, are left
 337    intact.  This is called when the cached information is no more
 338    useful for the current iteration, e.g. when we were reseated to a
 339    new position on the same object.  */
 340 static inline void
 341 bidi_cache_reset (void)
 342 {
 343   bidi_cache_idx = bidi_cache_start;
 344   bidi_cache_last_idx = -1;
 345 }
 346
 347 /* Shrink the cache to its minimal size.  Called when we init the bidi
 348    iterator for reordering a buffer or a string that does not come
 349    from display properties, because that means all the previously
 350    cached info is of no further use.  */
 351 static inline void
 352 bidi_cache_shrink (void)
 353 {
 354   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 355     {
 356       bidi_cache
 357         = (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 358       bidi_cache_size = BIDI_CACHE_CHUNK;
 359     }
 360   bidi_cache_reset ();
 361 }
 362
 363 static inline void
 364 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 365 {
 366   int current_scan_dir = bidi_it->scan_dir;
 367
 368   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 369     abort ();
 370
 371   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 372   bidi_it->scan_dir = current_scan_dir;
 373   bidi_cache_last_idx = idx;
 374 }
 375
 376 /* Find a cached state with a given CHARPOS and resolved embedding
 377    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 378    resolved levels in cached states.  DIR, if non-zero, means search
 379    in that direction from the last cache hit.  */
 380 static inline ptrdiff_t
 381 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
 382 {
 383   ptrdiff_t i, i_start;
 384
 385   if (bidi_cache_idx > bidi_cache_start)
 386     {
 387       if (bidi_cache_last_idx == -1)
 388         bidi_cache_last_idx = bidi_cache_idx - 1;
 389       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 390         {
 391           dir = -1;
 392           i_start = bidi_cache_last_idx - 1;
 393         }
 394       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 395                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 396         {
 397           dir = 1;
 398           i_start = bidi_cache_last_idx + 1;
 399         }
 400       else if (dir)
 401         i_start = bidi_cache_last_idx;
 402       else
 403         {
 404           dir = -1;
 405           i_start = bidi_cache_idx - 1;
 406         }
 407
 408       if (dir < 0)
 409         {
 410           /* Linear search for now; FIXME!  */
 411           for (i = i_start; i >= bidi_cache_start; i--)
 412             if (bidi_cache[i].charpos <= charpos
 413                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 414                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 415               return i;
 416         }
 417       else
 418         {
 419           for (i = i_start; i < bidi_cache_idx; i++)
 420             if (bidi_cache[i].charpos <= charpos
 421                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 422                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 423               return i;
 424         }
 425     }
 426
 427   return -1;
 428 }
 429
 430 /* Find a cached state where the resolved level changes to a value
 431    that is lower than LEVEL, and return its cache slot index.  DIR is
 432    the direction to search, starting with the last used cache slot.
 433    If DIR is zero, we search backwards from the last occupied cache
 434    slot.  BEFORE, if non-zero, means return the index of the slot that
 435    is ``before'' the level change in the search direction.  That is,
 436    given the cached levels like this:
 437
 438          1122333442211
 439           AB        C
 440
 441    and assuming we are at the position cached at the slot marked with
 442    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 443    index of slot B or A, depending whether BEFORE is, respectively,
 444    non-zero or zero.  */
 445 static ptrdiff_t
 446 bidi_cache_find_level_change (int level, int dir, int before)
 447 {
 448   if (bidi_cache_idx)
 449     {
 450       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 451       int incr = before ? 1 : 0;
 452
 453       xassert (!dir || bidi_cache_last_idx >= 0);
 454
 455       if (!dir)
 456         dir = -1;
 457       else if (!incr)
 458         i += dir;
 459
 460       if (dir < 0)
 461         {
 462           while (i >= bidi_cache_start + incr)
 463             {
 464               if (bidi_cache[i - incr].resolved_level >= 0
 465                   && bidi_cache[i - incr].resolved_level < level)
 466                 return i;
 467               i--;
 468             }
 469         }
 470       else
 471         {
 472           while (i < bidi_cache_idx - incr)
 473             {
 474               if (bidi_cache[i + incr].resolved_level >= 0
 475                   && bidi_cache[i + incr].resolved_level < level)
 476                 return i;
 477               i++;
 478             }
 479         }
 480     }
 481
 482   return -1;
 483 }
 484
 485 static inline void
 486 bidi_cache_ensure_space (ptrdiff_t idx)
 487 {
 488   /* Enlarge the cache as needed.  */
 489   if (idx >= bidi_cache_size)
 490     {
 491       /* The bidi cache cannot be larger than the largest Lisp string
 492          or buffer.  */
 493       ptrdiff_t string_or_buffer_bound
 494         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 495
 496       /* Also, it cannot be larger than what C can represent.  */
 497       ptrdiff_t c_bound
 498         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 499
 500       bidi_cache
 501         = xpalloc (bidi_cache, &bidi_cache_size,
 502                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 503                    min (string_or_buffer_bound, c_bound), elsz);
 504     }
 505 }
 506
 507 static inline void
 508 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 509 {
 510   ptrdiff_t idx;
 511
 512   /* We should never cache on backward scans.  */
 513   if (bidi_it->scan_dir == -1)
 514     abort ();
 515   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 516
 517   if (idx < 0)
 518     {
 519       idx = bidi_cache_idx;
 520       bidi_cache_ensure_space (idx);
 521       /* Character positions should correspond to cache positions 1:1.
 522          If we are outside the range of cached positions, the cache is
 523          useless and must be reset.  */
 524       if (idx > bidi_cache_start &&
 525           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 526                                + bidi_cache[idx - 1].nchars)
 527            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 528         {
 529           bidi_cache_reset ();
 530           idx = bidi_cache_start;
 531         }
 532       if (bidi_it->nchars <= 0)
 533         abort ();
 534       bidi_copy_it (&bidi_cache[idx], bidi_it);
 535       if (!resolved)
 536         bidi_cache[idx].resolved_level = -1;
 537     }
 538   else
 539     {
 540       /* Copy only the members which could have changed, to avoid
 541          costly copying of the entire struct.  */
 542       bidi_cache[idx].type = bidi_it->type;
 543       bidi_check_type (bidi_it->type);
 544       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 545       bidi_check_type (bidi_it->type_after_w1);
 546       if (resolved)
 547         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 548       else
 549         bidi_cache[idx].resolved_level = -1;
 550       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 551       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 552       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 553       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 554       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 555       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 556       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 557     }
 558
 559   bidi_cache_last_idx = idx;
 560   if (idx >= bidi_cache_idx)
 561     bidi_cache_idx = idx + 1;
 562 }
 563
 564 static inline bidi_type_t
 565 bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
 566 {
 567   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 568
 569   if (i >= bidi_cache_start)
 570     {
 571       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 572
 573       bidi_copy_it (bidi_it, &bidi_cache[i]);
 574       bidi_cache_last_idx = i;
 575       /* Don't let scan direction from from the cached state override
 576          the current scan direction.  */
 577       bidi_it->scan_dir = current_scan_dir;
 578       return bidi_it->type;
 579     }
 580
 581   return UNKNOWN_BT;
 582 }
 583
 584 static inline int
 585 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 586 {
 587   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 588     abort ();
 589   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 590 }
 591
 592 \f
 593 /***********************************************************************
 594              Pushing and popping the bidi iterator state
 595  ***********************************************************************/
 596
 597 /* Push the bidi iterator state in preparation for reordering a
 598    different object, e.g. display string found at certain buffer
 599    position.  Pushing the bidi iterator boils down to saving its
 600    entire state on the cache and starting a new cache "stacked" on top
 601    of the current cache.  */
 602 void
 603 bidi_push_it (struct bidi_it *bidi_it)
 604 {
 605   /* Save the current iterator state in its entirety after the last
 606      used cache slot.  */
 607   bidi_cache_ensure_space (bidi_cache_idx);
 608   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 609
 610   /* Push the current cache start onto the stack.  */
 611   xassert (bidi_cache_sp < IT_STACK_SIZE);
 612   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 613
 614   /* Start a new level of cache, and make it empty.  */
 615   bidi_cache_start = bidi_cache_idx;
 616   bidi_cache_last_idx = -1;
 617 }
 618
 619 /* Restore the iterator state saved by bidi_push_it and return the
 620    cache to the corresponding state.  */
 621 void
 622 bidi_pop_it (struct bidi_it *bidi_it)
 623 {
 624   if (bidi_cache_start <= 0)
 625     abort ();
 626
 627   /* Reset the next free cache slot index to what it was before the
 628      call to bidi_push_it.  */
 629   bidi_cache_idx = bidi_cache_start - 1;
 630
 631   /* Restore the bidi iterator state saved in the cache.  */
 632   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 633
 634   /* Pop the previous cache start from the stack.  */
 635   if (bidi_cache_sp <= 0)
 636     abort ();
 637   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 638
 639   /* Invalidate the last-used cache slot data.  */
 640   bidi_cache_last_idx = -1;
 641 }
 642
 643 static ptrdiff_t bidi_cache_total_alloc;
 644
 645 /* Stash away a copy of the cache and its control variables.  */
 646 void *
 647 bidi_shelve_cache (void)
 648 {
 649   unsigned char *databuf;
 650   ptrdiff_t alloc;
 651
 652   /* Empty cache.  */
 653   if (bidi_cache_idx == 0)
 654     return NULL;
 655
 656   alloc = (bidi_shelve_header_size
 657            + bidi_cache_idx * sizeof (struct bidi_it));
 658   databuf = xmalloc (alloc);
 659   bidi_cache_total_alloc += alloc;
 660
 661   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 662   memcpy (databuf + sizeof (bidi_cache_idx),
 663           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 664   memcpy (databuf + sizeof (bidi_cache_idx)
 665           + bidi_cache_idx * sizeof (struct bidi_it),
 666           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 667   memcpy (databuf + sizeof (bidi_cache_idx)
 668           + bidi_cache_idx * sizeof (struct bidi_it)
 669           + sizeof (bidi_cache_start_stack),
 670           &bidi_cache_sp, sizeof (bidi_cache_sp));
 671   memcpy (databuf + sizeof (bidi_cache_idx)
 672           + bidi_cache_idx * sizeof (struct bidi_it)
 673           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 674           &bidi_cache_start, sizeof (bidi_cache_start));
 675   memcpy (databuf + sizeof (bidi_cache_idx)
 676           + bidi_cache_idx * sizeof (struct bidi_it)
 677           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 678           + sizeof (bidi_cache_start),
 679           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 680
 681   return databuf;
 682 }
 683
 684 /* Restore the cache state from a copy stashed away by
 685    bidi_shelve_cache, and free the buffer used to stash that copy.
 686    JUST_FREE non-zero means free the buffer, but don't restore the
 687    cache; used when the corresponding iterator is discarded instead of
 688    being restored.  */
 689 void
 690 bidi_unshelve_cache (void *databuf, int just_free)
 691 {
 692   unsigned char *p = databuf;
 693
 694   if (!p)
 695     {
 696       if (!just_free)
 697         {
 698           /* A NULL pointer means an empty cache.  */
 699           bidi_cache_start = 0;
 700           bidi_cache_sp = 0;
 701           bidi_cache_reset ();
 702         }
 703     }
 704   else
 705     {
 706       if (just_free)
 707         {
 708           ptrdiff_t idx;
 709
 710           memcpy (&idx, p, sizeof (bidi_cache_idx));
 711           bidi_cache_total_alloc
 712             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 713         }
 714       else
 715         {
 716           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 717           bidi_cache_ensure_space (bidi_cache_idx);
 718           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 719                   bidi_cache_idx * sizeof (struct bidi_it));
 720           memcpy (bidi_cache_start_stack,
 721                   p + sizeof (bidi_cache_idx)
 722                   + bidi_cache_idx * sizeof (struct bidi_it),
 723                   sizeof (bidi_cache_start_stack));
 724           memcpy (&bidi_cache_sp,
 725                   p + sizeof (bidi_cache_idx)
 726                   + bidi_cache_idx * sizeof (struct bidi_it)
 727                   + sizeof (bidi_cache_start_stack),
 728                   sizeof (bidi_cache_sp));
 729           memcpy (&bidi_cache_start,
 730                   p + sizeof (bidi_cache_idx)
 731                   + bidi_cache_idx * sizeof (struct bidi_it)
 732                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 733                   sizeof (bidi_cache_start));
 734           memcpy (&bidi_cache_last_idx,
 735                   p + sizeof (bidi_cache_idx)
 736                   + bidi_cache_idx * sizeof (struct bidi_it)
 737                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 738                   + sizeof (bidi_cache_start),
 739                   sizeof (bidi_cache_last_idx));
 740           bidi_cache_total_alloc
 741             -= (bidi_shelve_header_size
 742                 + bidi_cache_idx * sizeof (struct bidi_it));
 743         }
 744
 745       xfree (p);
 746     }
 747 }
 748
 749 \f
 750 /***********************************************************************
 751                         Initialization
 752  ***********************************************************************/
 753 static void
 754 bidi_initialize (void)
 755 {
 756   bidi_type_table = uniprop_table (intern ("bidi-class"));
 757   if (NILP (bidi_type_table))
 758     abort ();
 759   staticpro (&bidi_type_table);
 760
 761   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 762   if (NILP (bidi_mirror_table))
 763     abort ();
 764   staticpro (&bidi_mirror_table);
 765
 766   Qparagraph_start = intern ("paragraph-start");
 767   staticpro (&Qparagraph_start);
 768   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 769   if (!STRINGP (paragraph_start_re))
 770     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 771   staticpro (&paragraph_start_re);
 772   Qparagraph_separate = intern ("paragraph-separate");
 773   staticpro (&Qparagraph_separate);
 774   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 775   if (!STRINGP (paragraph_separate_re))
 776     paragraph_separate_re = build_string ("[ \t\f]*$");
 777   staticpro (&paragraph_separate_re);
 778
 779   bidi_cache_sp = 0;
 780   bidi_cache_total_alloc = 0;
 781
 782   bidi_initialized = 1;
 783 }
 784
 785 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 786    end.  */
 787 static inline void
 788 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 789 {
 790   bidi_it->invalid_levels = 0;
 791   bidi_it->invalid_rl_levels = -1;
 792   bidi_it->stack_idx = 0;
 793   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 794 }
 795
 796 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 797 void
 798 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, int frame_window_p,
 799               struct bidi_it *bidi_it)
 800 {
 801   if (! bidi_initialized)
 802     bidi_initialize ();
 803   if (charpos >= 0)
 804     bidi_it->charpos = charpos;
 805   if (bytepos >= 0)
 806     bidi_it->bytepos = bytepos;
 807   bidi_it->frame_window_p = frame_window_p;
 808   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 809   bidi_it->first_elt = 1;
 810   bidi_set_paragraph_end (bidi_it);
 811   bidi_it->new_paragraph = 1;
 812   bidi_it->separator_limit = -1;
 813   bidi_it->type = NEUTRAL_B;
 814   bidi_it->type_after_w1 = NEUTRAL_B;
 815   bidi_it->orig_type = NEUTRAL_B;
 816   bidi_it->prev_was_pdf = 0;
 817   bidi_it->prev.type = bidi_it->prev.type_after_w1
 818     = bidi_it->prev.orig_type = UNKNOWN_BT;
 819   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 820     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 821   bidi_it->next_for_neutral.charpos = -1;
 822   bidi_it->next_for_neutral.type
 823     = bidi_it->next_for_neutral.type_after_w1
 824     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 825   bidi_it->prev_for_neutral.charpos = -1;
 826   bidi_it->prev_for_neutral.type
 827     = bidi_it->prev_for_neutral.type_after_w1
 828     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 829   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 830   bidi_it->disp_pos = -1;       /* invalid/unknown */
 831   bidi_it->disp_prop = 0;
 832   /* We can only shrink the cache if we are at the bottom level of its
 833      "stack".  */
 834   if (bidi_cache_start == 0)
 835     bidi_cache_shrink ();
 836   else
 837     bidi_cache_reset ();
 838 }
 839
 840 /* Perform initializations for reordering a new line of bidi text.  */
 841 static void
 842 bidi_line_init (struct bidi_it *bidi_it)
 843 {
 844   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 845   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 846   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 847   bidi_it->invalid_levels = 0;
 848   bidi_it->invalid_rl_levels = -1;
 849   bidi_it->next_en_pos = -1;
 850   bidi_it->next_for_ws.type = UNKNOWN_BT;
 851   bidi_set_sor_type (bidi_it,
 852                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 853                      bidi_it->level_stack[0].level); /* X10 */
 854
 855   bidi_cache_reset ();
 856 }
 857
 858 \f
 859 /***********************************************************************
 860                         Fetching characters
 861  ***********************************************************************/
 862
 863 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 864    are zero-based character positions in S, BEGBYTE is byte position
 865    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 866    string.  */
 867 static inline ptrdiff_t
 868 bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
 869                   const ptrdiff_t begbyte, const ptrdiff_t end, int unibyte)
 870 {
 871   ptrdiff_t pos = beg;
 872   const unsigned char *p = s + begbyte, *start = p;
 873
 874   if (unibyte)
 875     p = s + end;
 876   else
 877     {
 878       if (!CHAR_HEAD_P (*p))
 879         abort ();
 880
 881       while (pos < end)
 882         {
 883           p += BYTES_BY_CHAR_HEAD (*p);
 884           pos++;
 885         }
 886     }
 887
 888   return p - start;
 889 }
 890
 891 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 892    non-NULL, fetch the character from string S; otherwise fetch the
 893    character from the current buffer.  UNIBYTE non-zero means S is a
 894    unibyte string.  */
 895 static inline int
 896 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, int unibyte)
 897 {
 898   if (s)
 899     {
 900       if (unibyte)
 901         return s[bytepos];
 902       else
 903         return STRING_CHAR (s + bytepos);
 904     }
 905   else
 906     return FETCH_MULTIBYTE_CHAR (bytepos);
 907 }
 908
 909 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 910    character is covered by a display string, treat the entire run of
 911    covered characters as a single character, either u+2029 or u+FFFC,
 912    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 913    specifies the character position of the next display string, or -1
 914    if not yet computed.  When the next character is at or beyond that
 915    position, the function updates DISP_POS with the position of the
 916    next display string.  DISP_PROP non-zero means that there's really
 917    a display string at DISP_POS, as opposed to when we searched till
 918    DISP_POS without finding one.  If DISP_PROP is 2, it means the
 919    display spec is of the form `(space ...)', which is replaced with
 920    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 921    string to iterate, or NULL if iterating over a buffer or a Lisp
 922    string; in the latter case, STRING->lstring is the Lisp string.  */
 923 static inline int
 924 bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
 925                  int *disp_prop, struct bidi_string_data *string,
 926                  int frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
 927 {
 928   int ch;
 929   ptrdiff_t endpos
 930     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 931   struct text_pos pos;
 932
 933   /* If we got past the last known position of display string, compute
 934      the position of the next one.  That position could be at CHARPOS.  */
 935   if (charpos < endpos && charpos > *disp_pos)
 936     {
 937       SET_TEXT_POS (pos, charpos, bytepos);
 938       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 939                                               disp_prop);
 940     }
 941
 942   /* Fetch the character at BYTEPOS.  */
 943   if (charpos >= endpos)
 944     {
 945       ch = BIDI_EOB;
 946       *ch_len = 1;
 947       *nchars = 1;
 948       *disp_pos = endpos;
 949       *disp_prop = 0;
 950     }
 951   else if (charpos >= *disp_pos && *disp_prop)
 952     {
 953       ptrdiff_t disp_end_pos;
 954
 955       /* We don't expect to find ourselves in the middle of a display
 956          property.  Hopefully, it will never be needed.  */
 957       if (charpos > *disp_pos)
 958         abort ();
 959       /* Text covered by `display' properties and overlays with
 960          display properties or display strings is handled as a single
 961          character that represents the entire run of characters
 962          covered by the display property.  */
 963       if (*disp_prop == 2)
 964         {
 965           /* `(space ...)' display specs are handled as paragraph
 966              separators for the purposes of the reordering; see UAX#9
 967              section 3 and clause HL1 in section 4.3 there.  */
 968           ch = 0x2029;
 969         }
 970       else
 971         {
 972           /* All other display specs are handled as the Unicode Object
 973              Replacement Character.  */
 974           ch = 0xFFFC;
 975         }
 976       disp_end_pos = compute_display_string_end (*disp_pos, string);
 977       *nchars = disp_end_pos - *disp_pos;
 978       if (*nchars <= 0)
 979         abort ();
 980       if (string->s)
 981         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 982                                     disp_end_pos, string->unibyte);
 983       else if (STRINGP (string->lstring))
 984         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 985                                     bytepos, disp_end_pos, string->unibyte);
 986       else
 987         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 988     }
 989   else
 990     {
 991       if (string->s)
 992         {
 993           int len;
 994
 995           if (!string->unibyte)
 996             {
 997               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
 998               *ch_len = len;
 999             }
1000           else
1001             {
1002               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1003               *ch_len = 1;
1004             }
1005         }
1006       else if (STRINGP (string->lstring))
1007         {
1008           int len;
1009
1010           if (!string->unibyte)
1011             {
1012               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1013                                            len);
1014               *ch_len = len;
1015             }
1016           else
1017             {
1018               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1019               *ch_len = 1;
1020             }
1021         }
1022       else
1023         {
1024           ch = FETCH_MULTIBYTE_CHAR (bytepos);
1025           *ch_len = CHAR_BYTES (ch);
1026         }
1027       *nchars = 1;
1028     }
1029
1030   /* If we just entered a run of characters covered by a display
1031      string, compute the position of the next display string.  */
1032   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1033       && *disp_prop)
1034     {
1035       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1036       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1037                                               disp_prop);
1038     }
1039
1040   return ch;
1041 }
1042
1043 \f
1044 /***********************************************************************
1045                         Determining paragraph direction
1046  ***********************************************************************/
1047
1048 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1049    Value is the non-negative length of the paragraph separator
1050    following the buffer position, -1 if position is at the beginning
1051    of a new paragraph, or -2 if position is neither at beginning nor
1052    at end of a paragraph.  */
1053 static ptrdiff_t
1054 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1055 {
1056   Lisp_Object sep_re;
1057   Lisp_Object start_re;
1058   ptrdiff_t val;
1059
1060   sep_re = paragraph_separate_re;
1061   start_re = paragraph_start_re;
1062
1063   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1064   if (val < 0)
1065     {
1066       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1067         val = -1;
1068       else
1069         val = -2;
1070     }
1071
1072   return val;
1073 }
1074
1075 /* On my 2005-vintage machine, searching back for paragraph start
1076    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1077    when user types C-p.  The number below limits each call to
1078    bidi_paragraph_init to about 10 ms.  */
1079 #define MAX_PARAGRAPH_SEARCH 7500
1080
1081 /* Find the beginning of this paragraph by looking back in the buffer.
1082    Value is the byte position of the paragraph's beginning, or
1083    BEGV_BYTE if paragraph_start_re is still not found after looking
1084    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1085 static ptrdiff_t
1086 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1087 {
1088   Lisp_Object re = paragraph_start_re;
1089   ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1090   ptrdiff_t n = 0;
1091
1092   while (pos_byte > BEGV_BYTE
1093          && n++ < MAX_PARAGRAPH_SEARCH
1094          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1095     {
1096       /* FIXME: What if the paragraph beginning is covered by a
1097          display string?  And what if a display string covering some
1098          of the text over which we scan back includes
1099          paragraph_start_re?  */
1100       pos = find_next_newline_no_quit (pos - 1, -1);
1101       pos_byte = CHAR_TO_BYTE (pos);
1102     }
1103   if (n >= MAX_PARAGRAPH_SEARCH)
1104     pos_byte = BEGV_BYTE;
1105   return pos_byte;
1106 }
1107
1108 /* Determine the base direction, a.k.a. base embedding level, of the
1109    paragraph we are about to iterate through.  If DIR is either L2R or
1110    R2L, just use that.  Otherwise, determine the paragraph direction
1111    from the first strong directional character of the paragraph.
1112
1113    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1114    has no strong directional characters and both DIR and
1115    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1116    in the buffer until a paragraph is found with a strong character,
1117    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1118    flag is used in current-bidi-paragraph-direction.
1119
1120    Note that this function gives the paragraph separator the same
1121    direction as the preceding paragraph, even though Emacs generally
1122    views the separartor as not belonging to any paragraph.  */
1123 void
1124 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1125 {
1126   ptrdiff_t bytepos = bidi_it->bytepos;
1127   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1128   ptrdiff_t pstartbyte;
1129   /* Note that begbyte is a byte position, while end is a character
1130      position.  Yes, this is ugly, but we are trying to avoid costly
1131      calls to BYTE_TO_CHAR and its ilk.  */
1132   ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1133   ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1134
1135   /* Special case for an empty buffer. */
1136   if (bytepos == begbyte && bidi_it->charpos == end)
1137     dir = L2R;
1138   /* We should never be called at EOB or before BEGV.  */
1139   else if (bidi_it->charpos >= end || bytepos < begbyte)
1140     abort ();
1141
1142   if (dir == L2R)
1143     {
1144       bidi_it->paragraph_dir = L2R;
1145       bidi_it->new_paragraph = 0;
1146     }
1147   else if (dir == R2L)
1148     {
1149       bidi_it->paragraph_dir = R2L;
1150       bidi_it->new_paragraph = 0;
1151     }
1152   else if (dir == NEUTRAL_DIR)  /* P2 */
1153     {
1154       int ch;
1155       ptrdiff_t ch_len, nchars;
1156       ptrdiff_t pos, disp_pos = -1;
1157       int disp_prop = 0;
1158       bidi_type_t type;
1159       const unsigned char *s;
1160
1161       if (!bidi_initialized)
1162         bidi_initialize ();
1163
1164       /* If we are inside a paragraph separator, we are just waiting
1165          for the separator to be exhausted; use the previous paragraph
1166          direction.  But don't do that if we have been just reseated,
1167          because we need to reinitialize below in that case.  */
1168       if (!bidi_it->first_elt
1169           && bidi_it->charpos < bidi_it->separator_limit)
1170         return;
1171
1172       /* If we are on a newline, get past it to where the next
1173          paragraph might start.  But don't do that at BEGV since then
1174          we are potentially in a new paragraph that doesn't yet
1175          exist.  */
1176       pos = bidi_it->charpos;
1177       s = (STRINGP (bidi_it->string.lstring)
1178            ? SDATA (bidi_it->string.lstring)
1179            : bidi_it->string.s);
1180       if (bytepos > begbyte
1181           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1182         {
1183           bytepos++;
1184           pos++;
1185         }
1186
1187       /* We are either at the beginning of a paragraph or in the
1188          middle of it.  Find where this paragraph starts.  */
1189       if (string_p)
1190         {
1191           /* We don't support changes of paragraph direction inside a
1192              string.  It is treated as a single paragraph.  */
1193           pstartbyte = 0;
1194         }
1195       else
1196         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1197       bidi_it->separator_limit = -1;
1198       bidi_it->new_paragraph = 0;
1199
1200       /* The following loop is run more than once only if NO_DEFAULT_P
1201          is non-zero, and only if we are iterating on a buffer.  */
1202       do {
1203         bytepos = pstartbyte;
1204         if (!string_p)
1205           pos = BYTE_TO_CHAR (bytepos);
1206         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1207                               &bidi_it->string,
1208                               bidi_it->frame_window_p, &ch_len, &nchars);
1209         type = bidi_get_type (ch, NEUTRAL_DIR);
1210
1211         for (pos += nchars, bytepos += ch_len;
1212              (bidi_get_category (type) != STRONG)
1213                || (bidi_ignore_explicit_marks_for_paragraph_level
1214                    && (type == RLE || type == RLO
1215                        || type == LRE || type == LRO));
1216              type = bidi_get_type (ch, NEUTRAL_DIR))
1217           {
1218             if (pos >= end)
1219               {
1220                 /* Pretend there's a paragraph separator at end of
1221                    buffer/string.  */
1222                 type = NEUTRAL_B;
1223                 break;
1224               }
1225             if (!string_p
1226                 && type == NEUTRAL_B
1227                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1228               break;
1229             /* Fetch next character and advance to get past it.  */
1230             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1231                                   &disp_prop, &bidi_it->string,
1232                                   bidi_it->frame_window_p, &ch_len, &nchars);
1233             pos += nchars;
1234             bytepos += ch_len;
1235           }
1236         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1237             || (!bidi_ignore_explicit_marks_for_paragraph_level
1238                 && (type == RLO || type == RLE)))
1239           bidi_it->paragraph_dir = R2L;
1240         else if (type == STRONG_L
1241                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1242                      && (type == LRO || type == LRE)))
1243           bidi_it->paragraph_dir = L2R;
1244         if (!string_p
1245             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1246           {
1247             /* If this paragraph is at BEGV, default to L2R.  */
1248             if (pstartbyte == BEGV_BYTE)
1249               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1250             else
1251               {
1252                 ptrdiff_t prevpbyte = pstartbyte;
1253                 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1254
1255                 /* Find the beginning of the previous paragraph, if any.  */
1256                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1257                   {
1258                     /* FXIME: What if p is covered by a display
1259                        string?  See also a FIXME inside
1260                        bidi_find_paragraph_start.  */
1261                     p--;
1262                     pbyte = CHAR_TO_BYTE (p);
1263                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1264                   }
1265                 pstartbyte = prevpbyte;
1266               }
1267           }
1268       } while (!string_p
1269                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1270     }
1271   else
1272     abort ();
1273
1274   /* Contrary to UAX#9 clause P3, we only default the paragraph
1275      direction to L2R if we have no previous usable paragraph
1276      direction.  This is allowed by the HL1 clause.  */
1277   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1278     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1279   if (bidi_it->paragraph_dir == R2L)
1280     bidi_it->level_stack[0].level = 1;
1281   else
1282     bidi_it->level_stack[0].level = 0;
1283
1284   bidi_line_init (bidi_it);
1285 }
1286
1287 \f
1288 /***********************************************************************
1289                  Resolving explicit and implicit levels.
1290   The rest of this file constitutes the core of the UBA implementation.
1291  ***********************************************************************/
1292
1293 static inline int
1294 bidi_explicit_dir_char (int ch)
1295 {
1296   bidi_type_t ch_type;
1297
1298   if (!bidi_initialized)
1299     abort ();
1300   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1301   return (ch_type == LRE || ch_type == LRO
1302           || ch_type == RLE || ch_type == RLO
1303           || ch_type == PDF);
1304 }
1305
1306 /* A helper function for bidi_resolve_explicit.  It advances to the
1307    next character in logical order and determines the new embedding
1308    level and directional override, but does not take into account
1309    empty embeddings.  */
1310 static int
1311 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1312 {
1313   int curchar;
1314   bidi_type_t type;
1315   int current_level;
1316   int new_level;
1317   bidi_dir_t override;
1318   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1319
1320   /* If reseat()'ed, don't advance, so as to start iteration from the
1321      position where we were reseated.  bidi_it->bytepos can be less
1322      than BEGV_BYTE after reseat to BEGV.  */
1323   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1324       || bidi_it->first_elt)
1325     {
1326       bidi_it->first_elt = 0;
1327       if (string_p)
1328         {
1329           const unsigned char *p
1330             = (STRINGP (bidi_it->string.lstring)
1331                ? SDATA (bidi_it->string.lstring)
1332                : bidi_it->string.s);
1333
1334           if (bidi_it->charpos < 0)
1335             bidi_it->charpos = 0;
1336           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1337                                                bidi_it->string.unibyte);
1338         }
1339       else
1340         {
1341           if (bidi_it->charpos < BEGV)
1342             bidi_it->charpos = BEGV;
1343           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1344         }
1345     }
1346   /* Don't move at end of buffer/string.  */
1347   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1348     {
1349       /* Advance to the next character, skipping characters covered by
1350          display strings (nchars > 1).  */
1351       if (bidi_it->nchars <= 0)
1352         abort ();
1353       bidi_it->charpos += bidi_it->nchars;
1354       if (bidi_it->ch_len == 0)
1355         abort ();
1356       bidi_it->bytepos += bidi_it->ch_len;
1357     }
1358
1359   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1360   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1361   new_level = current_level;
1362
1363   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1364     {
1365       curchar = BIDI_EOB;
1366       bidi_it->ch_len = 1;
1367       bidi_it->nchars = 1;
1368       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1369       bidi_it->disp_prop = 0;
1370     }
1371   else
1372     {
1373       /* Fetch the character at BYTEPOS.  If it is covered by a
1374          display string, treat the entire run of covered characters as
1375          a single character u+FFFC.  */
1376       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1377                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1378                                  &bidi_it->string, bidi_it->frame_window_p,
1379                                  &bidi_it->ch_len, &bidi_it->nchars);
1380     }
1381   bidi_it->ch = curchar;
1382
1383   /* Don't apply directional override here, as all the types we handle
1384      below will not be affected by the override anyway, and we need
1385      the original type unaltered.  The override will be applied in
1386      bidi_resolve_weak.  */
1387   type = bidi_get_type (curchar, NEUTRAL_DIR);
1388   bidi_it->orig_type = type;
1389   bidi_check_type (bidi_it->orig_type);
1390
1391   if (type != PDF)
1392     bidi_it->prev_was_pdf = 0;
1393
1394   bidi_it->type_after_w1 = UNKNOWN_BT;
1395
1396   switch (type)
1397     {
1398       case RLE: /* X2 */
1399       case RLO: /* X4 */
1400         bidi_it->type_after_w1 = type;
1401         bidi_check_type (bidi_it->type_after_w1);
1402         type = WEAK_BN; /* X9/Retaining */
1403         if (bidi_it->ignore_bn_limit <= -1)
1404           {
1405             if (current_level <= BIDI_MAXLEVEL - 4)
1406               {
1407                 /* Compute the least odd embedding level greater than
1408                    the current level.  */
1409                 new_level = ((current_level + 1) & ~1) + 1;
1410                 if (bidi_it->type_after_w1 == RLE)
1411                   override = NEUTRAL_DIR;
1412                 else
1413                   override = R2L;
1414                 if (current_level == BIDI_MAXLEVEL - 4)
1415                   bidi_it->invalid_rl_levels = 0;
1416                 bidi_push_embedding_level (bidi_it, new_level, override);
1417               }
1418             else
1419               {
1420                 bidi_it->invalid_levels++;
1421                 /* See the commentary about invalid_rl_levels below.  */
1422                 if (bidi_it->invalid_rl_levels < 0)
1423                   bidi_it->invalid_rl_levels = 0;
1424                 bidi_it->invalid_rl_levels++;
1425               }
1426           }
1427         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1428                  || bidi_it->next_en_pos > bidi_it->charpos)
1429           type = WEAK_EN;
1430         break;
1431       case LRE: /* X3 */
1432       case LRO: /* X5 */
1433         bidi_it->type_after_w1 = type;
1434         bidi_check_type (bidi_it->type_after_w1);
1435         type = WEAK_BN; /* X9/Retaining */
1436         if (bidi_it->ignore_bn_limit <= -1)
1437           {
1438             if (current_level <= BIDI_MAXLEVEL - 5)
1439               {
1440                 /* Compute the least even embedding level greater than
1441                    the current level.  */
1442                 new_level = ((current_level + 2) & ~1);
1443                 if (bidi_it->type_after_w1 == LRE)
1444                   override = NEUTRAL_DIR;
1445                 else
1446                   override = L2R;
1447                 bidi_push_embedding_level (bidi_it, new_level, override);
1448               }
1449             else
1450               {
1451                 bidi_it->invalid_levels++;
1452                 /* invalid_rl_levels counts invalid levels encountered
1453                    while the embedding level was already too high for
1454                    LRE/LRO, but not for RLE/RLO.  That is because
1455                    there may be exactly one PDF which we should not
1456                    ignore even though invalid_levels is non-zero.
1457                    invalid_rl_levels helps to know what PDF is
1458                    that.  */
1459                 if (bidi_it->invalid_rl_levels >= 0)
1460                   bidi_it->invalid_rl_levels++;
1461               }
1462           }
1463         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1464                  || bidi_it->next_en_pos > bidi_it->charpos)
1465           type = WEAK_EN;
1466         break;
1467       case PDF: /* X7 */
1468         bidi_it->type_after_w1 = type;
1469         bidi_check_type (bidi_it->type_after_w1);
1470         type = WEAK_BN; /* X9/Retaining */
1471         if (bidi_it->ignore_bn_limit <= -1)
1472           {
1473             if (!bidi_it->invalid_rl_levels)
1474               {
1475                 new_level = bidi_pop_embedding_level (bidi_it);
1476                 bidi_it->invalid_rl_levels = -1;
1477                 if (bidi_it->invalid_levels)
1478                   bidi_it->invalid_levels--;
1479                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1480               }
1481             if (!bidi_it->invalid_levels)
1482               new_level = bidi_pop_embedding_level (bidi_it);
1483             else
1484               {
1485                 bidi_it->invalid_levels--;
1486                 bidi_it->invalid_rl_levels--;
1487               }
1488           }
1489         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1490                  || bidi_it->next_en_pos > bidi_it->charpos)
1491           type = WEAK_EN;
1492         break;
1493       default:
1494         /* Nothing.  */
1495         break;
1496     }
1497
1498   bidi_it->type = type;
1499   bidi_check_type (bidi_it->type);
1500
1501   return new_level;
1502 }
1503
1504 /* Given an iterator state in BIDI_IT, advance one character position
1505    in the buffer/string to the next character (in the logical order),
1506    resolve any explicit embeddings and directional overrides, and
1507    return the embedding level of the character after resolving
1508    explicit directives and ignoring empty embeddings.  */
1509 static int
1510 bidi_resolve_explicit (struct bidi_it *bidi_it)
1511 {
1512   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1513   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1514   ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1515   const unsigned char *s
1516     = (STRINGP (bidi_it->string.lstring)
1517        ? SDATA (bidi_it->string.lstring)
1518        : bidi_it->string.s);
1519
1520   if (prev_level < new_level
1521       && bidi_it->type == WEAK_BN
1522       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1523       && bidi_it->charpos < eob         /* not already at EOB */
1524       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1525                                                    + bidi_it->ch_len, s,
1526                                                    bidi_it->string.unibyte)))
1527     {
1528       /* Avoid pushing and popping embedding levels if the level run
1529          is empty, as this breaks level runs where it shouldn't.
1530          UAX#9 removes all the explicit embedding and override codes,
1531          so empty embeddings disappear without a trace.  We need to
1532          behave as if we did the same.  */
1533       struct bidi_it saved_it;
1534       int level = prev_level;
1535
1536       bidi_copy_it (&saved_it, bidi_it);
1537
1538       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1539                                                        + bidi_it->ch_len, s,
1540                                                        bidi_it->string.unibyte)))
1541         {
1542           /* This advances to the next character, skipping any
1543              characters covered by display strings.  */
1544           level = bidi_resolve_explicit_1 (bidi_it);
1545           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1546              a pointer to its data is no longer valid.  */
1547           if (STRINGP (bidi_it->string.lstring))
1548             s = SDATA (bidi_it->string.lstring);
1549         }
1550
1551       if (bidi_it->nchars <= 0)
1552         abort ();
1553       if (level == prev_level)  /* empty embedding */
1554         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1555       else                      /* this embedding is non-empty */
1556         saved_it.ignore_bn_limit = -2;
1557
1558       bidi_copy_it (bidi_it, &saved_it);
1559       if (bidi_it->ignore_bn_limit > -1)
1560         {
1561           /* We pushed a level, but we shouldn't have.  Undo that. */
1562           if (!bidi_it->invalid_rl_levels)
1563             {
1564               new_level = bidi_pop_embedding_level (bidi_it);
1565               bidi_it->invalid_rl_levels = -1;
1566               if (bidi_it->invalid_levels)
1567                 bidi_it->invalid_levels--;
1568             }
1569           if (!bidi_it->invalid_levels)
1570             new_level = bidi_pop_embedding_level (bidi_it);
1571           else
1572             {
1573               bidi_it->invalid_levels--;
1574               bidi_it->invalid_rl_levels--;
1575             }
1576         }
1577     }
1578
1579   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1580     {
1581       bidi_set_paragraph_end (bidi_it);
1582       /* This is needed by bidi_resolve_weak below, and in L1.  */
1583       bidi_it->type_after_w1 = bidi_it->type;
1584       bidi_check_type (bidi_it->type_after_w1);
1585     }
1586
1587   return new_level;
1588 }
1589
1590 /* Advance in the buffer/string, resolve weak types and return the
1591    type of the next character after weak type resolution.  */
1592 static bidi_type_t
1593 bidi_resolve_weak (struct bidi_it *bidi_it)
1594 {
1595   bidi_type_t type;
1596   bidi_dir_t override;
1597   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1598   int new_level  = bidi_resolve_explicit (bidi_it);
1599   int next_char;
1600   bidi_type_t type_of_next;
1601   struct bidi_it saved_it;
1602   ptrdiff_t eob
1603     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1604        ? bidi_it->string.schars : ZV);
1605
1606   type = bidi_it->type;
1607   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1608
1609   if (type == UNKNOWN_BT
1610       || type == LRE
1611       || type == LRO
1612       || type == RLE
1613       || type == RLO
1614       || type == PDF)
1615     abort ();
1616
1617   if (new_level != prev_level
1618       || bidi_it->type == NEUTRAL_B)
1619     {
1620       /* We've got a new embedding level run, compute the directional
1621          type of sor and initialize per-run variables (UAX#9, clause
1622          X10).  */
1623       bidi_set_sor_type (bidi_it, prev_level, new_level);
1624     }
1625   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1626            || type == WEAK_BN || type == STRONG_AL)
1627     bidi_it->type_after_w1 = type;      /* needed in L1 */
1628   bidi_check_type (bidi_it->type_after_w1);
1629
1630   /* Level and directional override status are already recorded in
1631      bidi_it, and do not need any change; see X6.  */
1632   if (override == R2L)          /* X6 */
1633     type = STRONG_R;
1634   else if (override == L2R)
1635     type = STRONG_L;
1636   else
1637     {
1638       if (type == WEAK_NSM)     /* W1 */
1639         {
1640           /* Note that we don't need to consider the case where the
1641              prev character has its type overridden by an RLO or LRO,
1642              because then either the type of this NSM would have been
1643              also overridden, or the previous character is outside the
1644              current level run, and thus not relevant to this NSM.
1645              This is why NSM gets the type_after_w1 of the previous
1646              character.  */
1647           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1648               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1649               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1650             type = bidi_it->prev.type_after_w1;
1651           else if (bidi_it->sor == R2L)
1652             type = STRONG_R;
1653           else if (bidi_it->sor == L2R)
1654             type = STRONG_L;
1655           else /* shouldn't happen! */
1656             abort ();
1657         }
1658       if (type == WEAK_EN       /* W2 */
1659           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1660         type = WEAK_AN;
1661       else if (type == STRONG_AL) /* W3 */
1662         type = STRONG_R;
1663       else if ((type == WEAK_ES /* W4 */
1664                 && bidi_it->prev.type_after_w1 == WEAK_EN
1665                 && bidi_it->prev.orig_type == WEAK_EN)
1666                || (type == WEAK_CS
1667                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1668                         && bidi_it->prev.orig_type == WEAK_EN)
1669                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1670         {
1671           const unsigned char *s
1672             = (STRINGP (bidi_it->string.lstring)
1673                ? SDATA (bidi_it->string.lstring)
1674                : bidi_it->string.s);
1675
1676           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1677                        ? BIDI_EOB
1678                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1679                                            s, bidi_it->string.unibyte));
1680           type_of_next = bidi_get_type (next_char, override);
1681
1682           if (type_of_next == WEAK_BN
1683               || bidi_explicit_dir_char (next_char))
1684             {
1685               bidi_copy_it (&saved_it, bidi_it);
1686               while (bidi_resolve_explicit (bidi_it) == new_level
1687                      && bidi_it->type == WEAK_BN)
1688                 ;
1689               type_of_next = bidi_it->type;
1690               bidi_copy_it (bidi_it, &saved_it);
1691             }
1692
1693           /* If the next character is EN, but the last strong-type
1694              character is AL, that next EN will be changed to AN when
1695              we process it in W2 above.  So in that case, this ES
1696              should not be changed into EN.  */
1697           if (type == WEAK_ES
1698               && type_of_next == WEAK_EN
1699               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1700             type = WEAK_EN;
1701           else if (type == WEAK_CS)
1702             {
1703               if (bidi_it->prev.type_after_w1 == WEAK_AN
1704                   && (type_of_next == WEAK_AN
1705                       /* If the next character is EN, but the last
1706                          strong-type character is AL, EN will be later
1707                          changed to AN when we process it in W2 above.
1708                          So in that case, this ES should not be
1709                          changed into EN.  */
1710                       || (type_of_next == WEAK_EN
1711                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1712                 type = WEAK_AN;
1713               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1714                        && type_of_next == WEAK_EN
1715                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1716                 type = WEAK_EN;
1717             }
1718         }
1719       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1720                || type == WEAK_BN)      /* W5/Retaining */
1721         {
1722           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1723               || bidi_it->next_en_pos > bidi_it->charpos)
1724             type = WEAK_EN;
1725           else                  /* W5: ET/BN with EN after it.  */
1726             {
1727               ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
1728               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1729                                         ? SDATA (bidi_it->string.lstring)
1730                                         : bidi_it->string.s);
1731
1732               if (bidi_it->nchars <= 0)
1733                 abort ();
1734               next_char
1735                 = (bidi_it->charpos + bidi_it->nchars >= eob
1736                    ? BIDI_EOB
1737                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1738                                        bidi_it->string.unibyte));
1739               type_of_next = bidi_get_type (next_char, override);
1740
1741               if (type_of_next == WEAK_ET
1742                   || type_of_next == WEAK_BN
1743                   || bidi_explicit_dir_char (next_char))
1744                 {
1745                   bidi_copy_it (&saved_it, bidi_it);
1746                   while (bidi_resolve_explicit (bidi_it) == new_level
1747                          && (bidi_it->type == WEAK_BN
1748                              || bidi_it->type == WEAK_ET))
1749                     ;
1750                   type_of_next = bidi_it->type;
1751                   en_pos = bidi_it->charpos;
1752                   bidi_copy_it (bidi_it, &saved_it);
1753                 }
1754               if (type_of_next == WEAK_EN)
1755                 {
1756                   /* If the last strong character is AL, the EN we've
1757                      found will become AN when we get to it (W2). */
1758                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1759                     {
1760                       type = WEAK_EN;
1761                       /* Remember this EN position, to speed up processing
1762                          of the next ETs.  */
1763                       bidi_it->next_en_pos = en_pos;
1764                     }
1765                   else if (type == WEAK_BN)
1766                     type = NEUTRAL_ON; /* W6/Retaining */
1767                 }
1768             }
1769         }
1770     }
1771
1772   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1773       || (type == WEAK_BN
1774           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1775               || bidi_it->prev.type_after_w1 == WEAK_ES
1776               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1777     type = NEUTRAL_ON;
1778
1779   /* Store the type we've got so far, before we clobber it with strong
1780      types in W7 and while resolving neutral types.  But leave alone
1781      the original types that were recorded above, because we will need
1782      them for the L1 clause.  */
1783   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1784     bidi_it->type_after_w1 = type;
1785   bidi_check_type (bidi_it->type_after_w1);
1786
1787   if (type == WEAK_EN)  /* W7 */
1788     {
1789       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1790           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1791         type = STRONG_L;
1792     }
1793
1794   bidi_it->type = type;
1795   bidi_check_type (bidi_it->type);
1796   return type;
1797 }
1798
1799 /* Resolve the type of a neutral character according to the type of
1800    surrounding strong text and the current embedding level.  */
1801 static inline bidi_type_t
1802 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1803 {
1804   /* N1: European and Arabic numbers are treated as though they were R.  */
1805   if (next_type == WEAK_EN || next_type == WEAK_AN)
1806     next_type = STRONG_R;
1807   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1808     prev_type = STRONG_R;
1809
1810   if (next_type == prev_type)   /* N1 */
1811     return next_type;
1812   else if ((lev & 1) == 0)      /* N2 */
1813     return STRONG_L;
1814   else
1815     return STRONG_R;
1816 }
1817
1818 static bidi_type_t
1819 bidi_resolve_neutral (struct bidi_it *bidi_it)
1820 {
1821   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1822   bidi_type_t type = bidi_resolve_weak (bidi_it);
1823   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1824
1825   if (!(type == STRONG_R
1826         || type == STRONG_L
1827         || type == WEAK_BN
1828         || type == WEAK_EN
1829         || type == WEAK_AN
1830         || type == NEUTRAL_B
1831         || type == NEUTRAL_S
1832         || type == NEUTRAL_WS
1833         || type == NEUTRAL_ON))
1834     abort ();
1835
1836   if (bidi_get_category (type) == NEUTRAL
1837       || (type == WEAK_BN && prev_level == current_level))
1838     {
1839       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1840         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1841                                        bidi_it->next_for_neutral.type,
1842                                        current_level);
1843       else
1844         {
1845           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1846              the assumption of batch-style processing; see clauses W4,
1847              W5, and especially N1, which require to look far forward
1848              (as well as back) in the buffer/string.  May the fleas of
1849              a thousand camels infest the armpits of those who design
1850              supposedly general-purpose algorithms by looking at their
1851              own implementations, and fail to consider other possible
1852              implementations!  */
1853           struct bidi_it saved_it;
1854           bidi_type_t next_type;
1855
1856           if (bidi_it->scan_dir == -1)
1857             abort ();
1858
1859           bidi_copy_it (&saved_it, bidi_it);
1860           /* Scan the text forward until we find the first non-neutral
1861              character, and then use that to resolve the neutral we
1862              are dealing with now.  We also cache the scanned iterator
1863              states, to salvage some of the effort later.  */
1864           bidi_cache_iterator_state (bidi_it, 0);
1865           do {
1866             /* Record the info about the previous character, so that
1867                it will be cached below with this state.  */
1868             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1869                 && bidi_it->type != WEAK_BN)
1870               bidi_remember_char (&bidi_it->prev, bidi_it);
1871             type = bidi_resolve_weak (bidi_it);
1872             /* Paragraph separators have their levels fully resolved
1873                at this point, so cache them as resolved.  */
1874             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1875             /* FIXME: implement L1 here, by testing for a newline and
1876                resetting the level for any sequence of whitespace
1877                characters adjacent to it.  */
1878           } while (!(type == NEUTRAL_B
1879                      || (type != WEAK_BN
1880                          && bidi_get_category (type) != NEUTRAL)
1881                      /* This is all per level run, so stop when we
1882                         reach the end of this level run.  */
1883                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1884                          != current_level)));
1885
1886           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1887
1888           switch (type)
1889             {
1890               case STRONG_L:
1891               case STRONG_R:
1892               case STRONG_AL:
1893                 next_type = type;
1894                 break;
1895               case WEAK_EN:
1896               case WEAK_AN:
1897                 /* N1: ``European and Arabic numbers are treated as
1898                    though they were R.''  */
1899                 next_type = STRONG_R;
1900                 saved_it.next_for_neutral.type = STRONG_R;
1901                 break;
1902               case WEAK_BN:
1903                 if (!bidi_explicit_dir_char (bidi_it->ch))
1904                   abort ();             /* can't happen: BNs are skipped */
1905                 /* FALLTHROUGH */
1906               case NEUTRAL_B:
1907                 /* Marched all the way to the end of this level run.
1908                    We need to use the eor type, whose information is
1909                    stored by bidi_set_sor_type in the prev_for_neutral
1910                    member.  */
1911                 if (saved_it.type != WEAK_BN
1912                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1913                   {
1914                     next_type = bidi_it->prev_for_neutral.type;
1915                     saved_it.next_for_neutral.type = next_type;
1916                     bidi_check_type (next_type);
1917                   }
1918                 else
1919                   {
1920                     /* This is a BN which does not adjoin neutrals.
1921                        Leave its type alone.  */
1922                     bidi_copy_it (bidi_it, &saved_it);
1923                     return bidi_it->type;
1924                   }
1925                 break;
1926               default:
1927                 abort ();
1928             }
1929           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1930                                          next_type, current_level);
1931           saved_it.type = type;
1932           bidi_check_type (type);
1933           bidi_copy_it (bidi_it, &saved_it);
1934         }
1935     }
1936   return type;
1937 }
1938
1939 /* Given an iterator state in BIDI_IT, advance one character position
1940    in the buffer/string to the next character (in the logical order),
1941    resolve the bidi type of that next character, and return that
1942    type.  */
1943 static bidi_type_t
1944 bidi_type_of_next_char (struct bidi_it *bidi_it)
1945 {
1946   bidi_type_t type;
1947
1948   /* This should always be called during a forward scan.  */
1949   if (bidi_it->scan_dir != 1)
1950     abort ();
1951
1952   /* Reset the limit until which to ignore BNs if we step out of the
1953      area where we found only empty levels.  */
1954   if ((bidi_it->ignore_bn_limit > -1
1955        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1956       || (bidi_it->ignore_bn_limit == -2
1957           && !bidi_explicit_dir_char (bidi_it->ch)))
1958     bidi_it->ignore_bn_limit = -1;
1959
1960   type = bidi_resolve_neutral (bidi_it);
1961
1962   return type;
1963 }
1964
1965 /* Given an iterator state BIDI_IT, advance one character position in
1966    the buffer/string to the next character (in the current scan
1967    direction), resolve the embedding and implicit levels of that next
1968    character, and return the resulting level.  */
1969 static int
1970 bidi_level_of_next_char (struct bidi_it *bidi_it)
1971 {
1972   bidi_type_t type;
1973   int level, prev_level = -1;
1974   struct bidi_saved_info next_for_neutral;
1975   ptrdiff_t next_char_pos = -2;
1976
1977   if (bidi_it->scan_dir == 1)
1978     {
1979       ptrdiff_t eob
1980         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1981            ? bidi_it->string.schars : ZV);
1982
1983       /* There's no sense in trying to advance if we hit end of text.  */
1984       if (bidi_it->charpos >= eob)
1985         return bidi_it->resolved_level;
1986
1987       /* Record the info about the previous character.  */
1988       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1989           && bidi_it->type != WEAK_BN)
1990         bidi_remember_char (&bidi_it->prev, bidi_it);
1991       if (bidi_it->type_after_w1 == STRONG_R
1992           || bidi_it->type_after_w1 == STRONG_L
1993           || bidi_it->type_after_w1 == STRONG_AL)
1994         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1995       /* FIXME: it sounds like we don't need both prev and
1996          prev_for_neutral members, but I'm leaving them both for now.  */
1997       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1998           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1999         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2000
2001       /* If we overstepped the characters used for resolving neutrals
2002          and whitespace, invalidate their info in the iterator.  */
2003       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2004         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2005       if (bidi_it->next_en_pos >= 0
2006           && bidi_it->charpos >= bidi_it->next_en_pos)
2007         bidi_it->next_en_pos = -1;
2008       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2009           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2010         bidi_it->next_for_ws.type = UNKNOWN_BT;
2011
2012       /* This must be taken before we fill the iterator with the info
2013          about the next char.  If we scan backwards, the iterator
2014          state must be already cached, so there's no need to know the
2015          embedding level of the previous character, since we will be
2016          returning to our caller shortly.  */
2017       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2018     }
2019   next_for_neutral = bidi_it->next_for_neutral;
2020
2021   /* Perhaps the character we want is already cached.  If it is, the
2022      call to bidi_cache_find below will return a type other than
2023      UNKNOWN_BT.  */
2024   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2025     {
2026       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2027                  ? 0 : 1);
2028       if (bidi_it->scan_dir > 0)
2029         {
2030           if (bidi_it->nchars <= 0)
2031             abort ();
2032           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2033         }
2034       else if (bidi_it->charpos >= bob)
2035         /* Implementation note: we allow next_char_pos to be as low as
2036            0 for buffers or -1 for strings, and that is okay because
2037            that's the "position" of the sentinel iterator state we
2038            cached at the beginning of the iteration.  */
2039         next_char_pos = bidi_it->charpos - 1;
2040       if (next_char_pos >= bob - 1)
2041         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2042       else
2043         type = UNKNOWN_BT;
2044     }
2045   else
2046     type = UNKNOWN_BT;
2047   if (type != UNKNOWN_BT)
2048     {
2049       /* Don't lose the information for resolving neutrals!  The
2050          cached states could have been cached before their
2051          next_for_neutral member was computed.  If we are on our way
2052          forward, we can simply take the info from the previous
2053          state.  */
2054       if (bidi_it->scan_dir == 1
2055           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2056         bidi_it->next_for_neutral = next_for_neutral;
2057
2058       /* If resolved_level is -1, it means this state was cached
2059          before it was completely resolved, so we cannot return
2060          it.  */
2061       if (bidi_it->resolved_level != -1)
2062         return bidi_it->resolved_level;
2063     }
2064   if (bidi_it->scan_dir == -1)
2065     /* If we are going backwards, the iterator state is already cached
2066        from previous scans, and should be fully resolved.  */
2067     abort ();
2068
2069   if (type == UNKNOWN_BT)
2070     type = bidi_type_of_next_char (bidi_it);
2071
2072   if (type == NEUTRAL_B)
2073     return bidi_it->resolved_level;
2074
2075   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2076   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2077       || (type == WEAK_BN && prev_level == level))
2078     {
2079       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2080         abort ();
2081
2082       /* If the cached state shows a neutral character, it was not
2083          resolved by bidi_resolve_neutral, so do it now.  */
2084       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2085                                      bidi_it->next_for_neutral.type,
2086                                      level);
2087     }
2088
2089   if (!(type == STRONG_R
2090         || type == STRONG_L
2091         || type == WEAK_BN
2092         || type == WEAK_EN
2093         || type == WEAK_AN))
2094     abort ();
2095   bidi_it->type = type;
2096   bidi_check_type (bidi_it->type);
2097
2098   /* For L1 below, we need to know, for each WS character, whether
2099      it belongs to a sequence of WS characters preceding a newline
2100      or a TAB or a paragraph separator.  */
2101   if (bidi_it->orig_type == NEUTRAL_WS
2102       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2103     {
2104       int ch;
2105       ptrdiff_t clen = bidi_it->ch_len;
2106       ptrdiff_t bpos = bidi_it->bytepos;
2107       ptrdiff_t cpos = bidi_it->charpos;
2108       ptrdiff_t disp_pos = bidi_it->disp_pos;
2109       ptrdiff_t nc = bidi_it->nchars;
2110       struct bidi_string_data bs = bidi_it->string;
2111       bidi_type_t chtype;
2112       int fwp = bidi_it->frame_window_p;
2113       int dpp = bidi_it->disp_prop;
2114
2115       if (bidi_it->nchars <= 0)
2116         abort ();
2117       do {
2118         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2119                               fwp, &clen, &nc);
2120         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
2121           chtype = NEUTRAL_B;
2122         else
2123           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2124       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2125                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2126       bidi_it->next_for_ws.type = chtype;
2127       bidi_check_type (bidi_it->next_for_ws.type);
2128       bidi_it->next_for_ws.charpos = cpos;
2129       bidi_it->next_for_ws.bytepos = bpos;
2130     }
2131
2132   /* Resolve implicit levels, with a twist: PDFs get the embedding
2133      level of the enbedding they terminate.  See below for the
2134      reason.  */
2135   if (bidi_it->orig_type == PDF
2136       /* Don't do this if this formatting code didn't change the
2137          embedding level due to invalid or empty embeddings.  */
2138       && prev_level != level)
2139     {
2140       /* Don't look in UAX#9 for the reason for this: it's our own
2141          private quirk.  The reason is that we want the formatting
2142          codes to be delivered so that they bracket the text of their
2143          embedding.  For example, given the text
2144
2145              {RLO}teST{PDF}
2146
2147          we want it to be displayed as
2148
2149              {PDF}STet{RLO}
2150
2151          not as
2152
2153              STet{RLO}{PDF}
2154
2155          which will result because we bump up the embedding level as
2156          soon as we see the RLO and pop it as soon as we see the PDF,
2157          so RLO itself has the same embedding level as "teST", and
2158          thus would be normally delivered last, just before the PDF.
2159          The switch below fiddles with the level of PDF so that this
2160          ugly side effect does not happen.
2161
2162          (This is, of course, only important if the formatting codes
2163          are actually displayed, but Emacs does need to display them
2164          if the user wants to.)  */
2165       level = prev_level;
2166     }
2167   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2168            || bidi_it->orig_type == NEUTRAL_S
2169            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2170            /* || bidi_it->ch == LINESEP_CHAR */
2171            || (bidi_it->orig_type == NEUTRAL_WS
2172                && (bidi_it->next_for_ws.type == NEUTRAL_B
2173                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2174     level = bidi_it->level_stack[0].level;
2175   else if ((level & 1) == 0) /* I1 */
2176     {
2177       if (type == STRONG_R)
2178         level++;
2179       else if (type == WEAK_EN || type == WEAK_AN)
2180         level += 2;
2181     }
2182   else                  /* I2 */
2183     {
2184       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2185         level++;
2186     }
2187
2188   bidi_it->resolved_level = level;
2189   return level;
2190 }
2191
2192 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2193    non-zero, we are at the end of a level, and we need to prepare to
2194    resume the scan of the lower level.
2195
2196    If this level's other edge is cached, we simply jump to it, filling
2197    the iterator structure with the iterator state on the other edge.
2198    Otherwise, we walk the buffer or string until we come back to the
2199    same level as LEVEL.
2200
2201    Note: we are not talking here about a ``level run'' in the UAX#9
2202    sense of the term, but rather about a ``level'' which includes
2203    all the levels higher than it.  In other words, given the levels
2204    like this:
2205
2206          11111112222222333333334443343222222111111112223322111
2207                 A      B                    C
2208
2209    and assuming we are at point A scanning left to right, this
2210    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2211    at point B.  */
2212 static void
2213 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2214 {
2215   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2216   ptrdiff_t idx;
2217
2218   /* Try the cache first.  */
2219   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2220       >= bidi_cache_start)
2221     bidi_cache_fetch_state (idx, bidi_it);
2222   else
2223     {
2224       int new_level;
2225
2226       if (end_flag)
2227         abort (); /* if we are at end of level, its edges must be cached */
2228
2229       bidi_cache_iterator_state (bidi_it, 1);
2230       do {
2231         new_level = bidi_level_of_next_char (bidi_it);
2232         bidi_cache_iterator_state (bidi_it, 1);
2233       } while (new_level >= level);
2234     }
2235 }
2236
2237 void
2238 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2239 {
2240   int old_level, new_level, next_level;
2241   struct bidi_it sentinel;
2242   struct gcpro gcpro1;
2243
2244   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2245     abort ();
2246
2247   if (bidi_it->scan_dir == 0)
2248     {
2249       bidi_it->scan_dir = 1;    /* default to logical order */
2250     }
2251
2252   /* The code below can call eval, and thus cause GC.  If we are
2253      iterating a Lisp string, make sure it won't be GCed.  */
2254   if (STRINGP (bidi_it->string.lstring))
2255     GCPRO1 (bidi_it->string.lstring);
2256
2257   /* If we just passed a newline, initialize for the next line.  */
2258   if (!bidi_it->first_elt
2259       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2260     bidi_line_init (bidi_it);
2261
2262   /* Prepare the sentinel iterator state, and cache it.  When we bump
2263      into it, scanning backwards, we'll know that the last non-base
2264      level is exhausted.  */
2265   if (bidi_cache_idx == bidi_cache_start)
2266     {
2267       bidi_copy_it (&sentinel, bidi_it);
2268       if (bidi_it->first_elt)
2269         {
2270           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2271           sentinel.bytepos--;
2272           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2273           sentinel.ch_len = 1;
2274           sentinel.nchars = 1;
2275         }
2276       bidi_cache_iterator_state (&sentinel, 1);
2277     }
2278
2279   old_level = bidi_it->resolved_level;
2280   new_level = bidi_level_of_next_char (bidi_it);
2281
2282   /* Reordering of resolved levels (clause L2) is implemented by
2283      jumping to the other edge of the level and flipping direction of
2284      scanning the text whenever we find a level change.  */
2285   if (new_level != old_level)
2286     {
2287       int ascending = new_level > old_level;
2288       int level_to_search = ascending ? old_level + 1 : old_level;
2289       int incr = ascending ? 1 : -1;
2290       int expected_next_level = old_level + incr;
2291
2292       /* Jump (or walk) to the other edge of this level.  */
2293       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2294       /* Switch scan direction and peek at the next character in the
2295          new direction.  */
2296       bidi_it->scan_dir = -bidi_it->scan_dir;
2297
2298       /* The following loop handles the case where the resolved level
2299          jumps by more than one.  This is typical for numbers inside a
2300          run of text with left-to-right embedding direction, but can
2301          also happen in other situations.  In those cases the decision
2302          where to continue after a level change, and in what direction,
2303          is tricky.  For example, given a text like below:
2304
2305                   abcdefgh
2306                   11336622
2307
2308          (where the numbers below the text show the resolved levels),
2309          the result of reordering according to UAX#9 should be this:
2310
2311                   efdcghba
2312
2313          This is implemented by the loop below which flips direction
2314          and jumps to the other edge of the level each time it finds
2315          the new level not to be the expected one.  The expected level
2316          is always one more or one less than the previous one.  */
2317       next_level = bidi_peek_at_next_level (bidi_it);
2318       while (next_level != expected_next_level)
2319         {
2320           expected_next_level += incr;
2321           level_to_search += incr;
2322           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2323           bidi_it->scan_dir = -bidi_it->scan_dir;
2324           next_level = bidi_peek_at_next_level (bidi_it);
2325         }
2326
2327       /* Finally, deliver the next character in the new direction.  */
2328       next_level = bidi_level_of_next_char (bidi_it);
2329     }
2330
2331   /* Take note when we have just processed the newline that precedes
2332      the end of the paragraph.  The next time we are about to be
2333      called, set_iterator_to_next will automatically reinit the
2334      paragraph direction, if needed.  We do this at the newline before
2335      the paragraph separator, because the next character might not be
2336      the first character of the next paragraph, due to the bidi
2337      reordering, whereas we _must_ know the paragraph base direction
2338      _before_ we process the paragraph's text, since the base
2339      direction affects the reordering.  */
2340   if (bidi_it->scan_dir == 1
2341       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2342     {
2343       /* The paragraph direction of the entire string, once
2344          determined, is in effect for the entire string.  Setting the
2345          separator limit to the end of the string prevents
2346          bidi_paragraph_init from being called automatically on this
2347          string.  */
2348       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2349         bidi_it->separator_limit = bidi_it->string.schars;
2350       else if (bidi_it->bytepos < ZV_BYTE)
2351         {
2352           ptrdiff_t sep_len
2353             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2354                                      bidi_it->bytepos + bidi_it->ch_len);
2355           if (bidi_it->nchars <= 0)
2356             abort ();
2357           if (sep_len >= 0)
2358             {
2359               bidi_it->new_paragraph = 1;
2360               /* Record the buffer position of the last character of the
2361                  paragraph separator.  */
2362               bidi_it->separator_limit
2363                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2364             }
2365         }
2366     }
2367
2368   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2369     {
2370       /* If we are at paragraph's base embedding level and beyond the
2371          last cached position, the cache's job is done and we can
2372          discard it.  */
2373       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2374           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2375                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2376         bidi_cache_reset ();
2377         /* But as long as we are caching during forward scan, we must
2378            cache each state, or else the cache integrity will be
2379            compromised: it assumes cached states correspond to buffer
2380            positions 1:1.  */
2381       else
2382         bidi_cache_iterator_state (bidi_it, 1);
2383     }
2384
2385   if (STRINGP (bidi_it->string.lstring))
2386     UNGCPRO;
2387 }
2388
2389 /* This is meant to be called from within the debugger, whenever you
2390    wish to examine the cache contents.  */
2391 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2392 void
2393 bidi_dump_cached_states (void)
2394 {
2395   ptrdiff_t i;
2396   int ndigits = 1;
2397
2398   if (bidi_cache_idx == 0)
2399     {
2400       fprintf (stderr, "The cache is empty.\n");
2401       return;
2402     }
2403   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2404            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2405
2406   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2407     ndigits++;
2408   fputs ("ch  ", stderr);
2409   for (i = 0; i < bidi_cache_idx; i++)
2410     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2411   fputs ("\n", stderr);
2412   fputs ("lvl ", stderr);
2413   for (i = 0; i < bidi_cache_idx; i++)
2414     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2415   fputs ("\n", stderr);
2416   fputs ("pos ", stderr);
2417   for (i = 0; i < bidi_cache_idx; i++)
2418     fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
2419   fputs ("\n", stderr);
2420 }