src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications decribed in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but setting it to zero will take them
  86    into account.  */
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static inline bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 abort ();       /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static inline void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static inline bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       int v = XINT (val);
 208
 209       if (v < 0 || v > MAX_CHAR)
 210         abort ();
 211
 212       return v;
 213     }
 214
 215   return c;
 216 }
 217
 218 /* Determine the start-of-run (sor) directional type given the two
 219    embedding levels on either side of the run boundary.  Also, update
 220    the saved info about previously seen characters, since that info is
 221    generally valid for a single level run.  */
 222 static inline void
 223 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 224 {
 225   int higher_level = (level_before > level_after ? level_before : level_after);
 226
 227   /* The prev_was_pdf gork is required for when we have several PDFs
 228      in a row.  In that case, we want to compute the sor type for the
 229      next level run only once: when we see the first PDF.  That's
 230      because the sor type depends only on the higher of the two levels
 231      that we find on the two sides of the level boundary (see UAX#9,
 232      clause X10), and so we don't need to know the final embedding
 233      level to which we descend after processing all the PDFs.  */
 234   if (!bidi_it->prev_was_pdf || level_before < level_after)
 235     /* FIXME: should the default sor direction be user selectable?  */
 236     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 237   if (level_before > level_after)
 238     bidi_it->prev_was_pdf = 1;
 239
 240   bidi_it->prev.type = UNKNOWN_BT;
 241   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 242     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 243   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 244   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 245   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 246   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 247     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 248   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 249 }
 250
 251 /* Push the current embedding level and override status; reset the
 252    current level to LEVEL and the current override status to OVERRIDE.  */
 253 static inline void
 254 bidi_push_embedding_level (struct bidi_it *bidi_it,
 255                            int level, bidi_dir_t override)
 256 {
 257   bidi_it->stack_idx++;
 258   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 259   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 260   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 261 }
 262
 263 /* Pop the embedding level and directional override status from the
 264    stack, and return the new level.  */
 265 static inline int
 266 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 267 {
 268   /* UAX#9 says to ignore invalid PDFs.  */
 269   if (bidi_it->stack_idx > 0)
 270     bidi_it->stack_idx--;
 271   return bidi_it->level_stack[bidi_it->stack_idx].level;
 272 }
 273
 274 /* Record in SAVED_INFO the information about the current character.  */
 275 static inline void
 276 bidi_remember_char (struct bidi_saved_info *saved_info,
 277                     struct bidi_it *bidi_it)
 278 {
 279   saved_info->charpos = bidi_it->charpos;
 280   saved_info->bytepos = bidi_it->bytepos;
 281   saved_info->type = bidi_it->type;
 282   bidi_check_type (bidi_it->type);
 283   saved_info->type_after_w1 = bidi_it->type_after_w1;
 284   bidi_check_type (bidi_it->type_after_w1);
 285   saved_info->orig_type = bidi_it->orig_type;
 286   bidi_check_type (bidi_it->orig_type);
 287 }
 288
 289 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 290    copies the part of the level stack that is actually in use.  */
 291 static inline void
 292 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 293 {
 294   int i;
 295
 296   /* Copy everything except the level stack and beyond.  */
 297   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 298
 299   /* Copy the active part of the level stack.  */
 300   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 301   for (i = 1; i <= from->stack_idx; i++)
 302     to->level_stack[i] = from->level_stack[i];
 303 }
 304
 305 \f
 306 /***********************************************************************
 307                         Caching the bidi iterator states
 308  ***********************************************************************/
 309
 310 #define BIDI_CACHE_CHUNK 200
 311 static struct bidi_it *bidi_cache;
 312 static ptrdiff_t bidi_cache_size = 0;
 313 enum { elsz = sizeof (struct bidi_it) };
 314 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 315 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 316 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 317                                            "stack" level */
 318
 319 /* 5-slot stack for saving the start of the previous level of the
 320    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 321    and we need the same size of our stack.  */
 322 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 323 static int bidi_cache_sp;
 324
 325 /* Size of header used by bidi_shelve_cache.  */
 326 enum
 327   {
 328     bidi_shelve_header_size
 329       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 330          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 331          + sizeof (bidi_cache_last_idx))
 332   };
 333
 334 /* Reset the cache state to the empty state.  We only reset the part
 335    of the cache relevant to iteration of the current object.  Previous
 336    objects, which are pushed on the display iterator's stack, are left
 337    intact.  This is called when the cached information is no more
 338    useful for the current iteration, e.g. when we were reseated to a
 339    new position on the same object.  */
 340 static inline void
 341 bidi_cache_reset (void)
 342 {
 343   bidi_cache_idx = bidi_cache_start;
 344   bidi_cache_last_idx = -1;
 345 }
 346
 347 /* Shrink the cache to its minimal size.  Called when we init the bidi
 348    iterator for reordering a buffer or a string that does not come
 349    from display properties, because that means all the previously
 350    cached info is of no further use.  */
 351 static inline void
 352 bidi_cache_shrink (void)
 353 {
 354   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 355     {
 356       bidi_cache
 357         = (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 358       bidi_cache_size = BIDI_CACHE_CHUNK;
 359     }
 360   bidi_cache_reset ();
 361 }
 362
 363 static inline void
 364 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 365 {
 366   int current_scan_dir = bidi_it->scan_dir;
 367
 368   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 369     abort ();
 370
 371   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 372   bidi_it->scan_dir = current_scan_dir;
 373   bidi_cache_last_idx = idx;
 374 }
 375
 376 /* Find a cached state with a given CHARPOS and resolved embedding
 377    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 378    resolved levels in cached states.  DIR, if non-zero, means search
 379    in that direction from the last cache hit.  */
 380 static inline ptrdiff_t
 381 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 382 {
 383   ptrdiff_t i, i_start;
 384
 385   if (bidi_cache_idx > bidi_cache_start)
 386     {
 387       if (bidi_cache_last_idx == -1)
 388         bidi_cache_last_idx = bidi_cache_idx - 1;
 389       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 390         {
 391           dir = -1;
 392           i_start = bidi_cache_last_idx - 1;
 393         }
 394       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 395                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 396         {
 397           dir = 1;
 398           i_start = bidi_cache_last_idx + 1;
 399         }
 400       else if (dir)
 401         i_start = bidi_cache_last_idx;
 402       else
 403         {
 404           dir = -1;
 405           i_start = bidi_cache_idx - 1;
 406         }
 407
 408       if (dir < 0)
 409         {
 410           /* Linear search for now; FIXME!  */
 411           for (i = i_start; i >= bidi_cache_start; i--)
 412             if (bidi_cache[i].charpos <= charpos
 413                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 414                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 415               return i;
 416         }
 417       else
 418         {
 419           for (i = i_start; i < bidi_cache_idx; i++)
 420             if (bidi_cache[i].charpos <= charpos
 421                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 422                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 423               return i;
 424         }
 425     }
 426
 427   return -1;
 428 }
 429
 430 /* Find a cached state where the resolved level changes to a value
 431    that is lower than LEVEL, and return its cache slot index.  DIR is
 432    the direction to search, starting with the last used cache slot.
 433    If DIR is zero, we search backwards from the last occupied cache
 434    slot.  BEFORE, if non-zero, means return the index of the slot that
 435    is ``before'' the level change in the search direction.  That is,
 436    given the cached levels like this:
 437
 438          1122333442211
 439           AB        C
 440
 441    and assuming we are at the position cached at the slot marked with
 442    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 443    index of slot B or A, depending whether BEFORE is, respectively,
 444    non-zero or zero.  */
 445 static ptrdiff_t
 446 bidi_cache_find_level_change (int level, int dir, int before)
 447 {
 448   if (bidi_cache_idx)
 449     {
 450       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 451       int incr = before ? 1 : 0;
 452
 453       xassert (!dir || bidi_cache_last_idx >= 0);
 454
 455       if (!dir)
 456         dir = -1;
 457       else if (!incr)
 458         i += dir;
 459
 460       if (dir < 0)
 461         {
 462           while (i >= bidi_cache_start + incr)
 463             {
 464               if (bidi_cache[i - incr].resolved_level >= 0
 465                   && bidi_cache[i - incr].resolved_level < level)
 466                 return i;
 467               i--;
 468             }
 469         }
 470       else
 471         {
 472           while (i < bidi_cache_idx - incr)
 473             {
 474               if (bidi_cache[i + incr].resolved_level >= 0
 475                   && bidi_cache[i + incr].resolved_level < level)
 476                 return i;
 477               i++;
 478             }
 479         }
 480     }
 481
 482   return -1;
 483 }
 484
 485 static inline void
 486 bidi_cache_ensure_space (ptrdiff_t idx)
 487 {
 488   /* Enlarge the cache as needed.  */
 489   if (idx >= bidi_cache_size)
 490     {
 491       /* The bidi cache cannot be larger than the largest Lisp string
 492          or buffer.  */
 493       ptrdiff_t string_or_buffer_bound
 494         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 495
 496       /* Also, it cannot be larger than what C can represent.  */
 497       ptrdiff_t c_bound
 498         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 499
 500       bidi_cache
 501         = xpalloc (bidi_cache, &bidi_cache_size,
 502                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 503                    min (string_or_buffer_bound, c_bound), elsz);
 504     }
 505 }
 506
 507 static inline void
 508 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 509 {
 510   ptrdiff_t idx;
 511
 512   /* We should never cache on backward scans.  */
 513   if (bidi_it->scan_dir == -1)
 514     abort ();
 515   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 516
 517   if (idx < 0)
 518     {
 519       idx = bidi_cache_idx;
 520       bidi_cache_ensure_space (idx);
 521       /* Character positions should correspond to cache positions 1:1.
 522          If we are outside the range of cached positions, the cache is
 523          useless and must be reset.  */
 524       if (idx > bidi_cache_start &&
 525           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 526                                + bidi_cache[idx - 1].nchars)
 527            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 528         {
 529           bidi_cache_reset ();
 530           idx = bidi_cache_start;
 531         }
 532       if (bidi_it->nchars <= 0)
 533         abort ();
 534       bidi_copy_it (&bidi_cache[idx], bidi_it);
 535       if (!resolved)
 536         bidi_cache[idx].resolved_level = -1;
 537     }
 538   else
 539     {
 540       /* Copy only the members which could have changed, to avoid
 541          costly copying of the entire struct.  */
 542       bidi_cache[idx].type = bidi_it->type;
 543       bidi_check_type (bidi_it->type);
 544       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 545       bidi_check_type (bidi_it->type_after_w1);
 546       if (resolved)
 547         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 548       else
 549         bidi_cache[idx].resolved_level = -1;
 550       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 551       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 552       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 553       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 554       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 555       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 556       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 557     }
 558
 559   bidi_cache_last_idx = idx;
 560   if (idx >= bidi_cache_idx)
 561     bidi_cache_idx = idx + 1;
 562 }
 563
 564 static inline bidi_type_t
 565 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 566 {
 567   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 568
 569   if (i >= bidi_cache_start)
 570     {
 571       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 572
 573       bidi_copy_it (bidi_it, &bidi_cache[i]);
 574       bidi_cache_last_idx = i;
 575       /* Don't let scan direction from from the cached state override
 576          the current scan direction.  */
 577       bidi_it->scan_dir = current_scan_dir;
 578       return bidi_it->type;
 579     }
 580
 581   return UNKNOWN_BT;
 582 }
 583
 584 static inline int
 585 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 586 {
 587   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 588     abort ();
 589   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 590 }
 591
 592 \f
 593 /***********************************************************************
 594              Pushing and popping the bidi iterator state
 595  ***********************************************************************/
 596
 597 /* Push the bidi iterator state in preparation for reordering a
 598    different object, e.g. display string found at certain buffer
 599    position.  Pushing the bidi iterator boils down to saving its
 600    entire state on the cache and starting a new cache "stacked" on top
 601    of the current cache.  */
 602 void
 603 bidi_push_it (struct bidi_it *bidi_it)
 604 {
 605   /* Save the current iterator state in its entirety after the last
 606      used cache slot.  */
 607   bidi_cache_ensure_space (bidi_cache_idx);
 608   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 609
 610   /* Push the current cache start onto the stack.  */
 611   xassert (bidi_cache_sp < IT_STACK_SIZE);
 612   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 613
 614   /* Start a new level of cache, and make it empty.  */
 615   bidi_cache_start = bidi_cache_idx;
 616   bidi_cache_last_idx = -1;
 617 }
 618
 619 /* Restore the iterator state saved by bidi_push_it and return the
 620    cache to the corresponding state.  */
 621 void
 622 bidi_pop_it (struct bidi_it *bidi_it)
 623 {
 624   if (bidi_cache_start <= 0)
 625     abort ();
 626
 627   /* Reset the next free cache slot index to what it was before the
 628      call to bidi_push_it.  */
 629   bidi_cache_idx = bidi_cache_start - 1;
 630
 631   /* Restore the bidi iterator state saved in the cache.  */
 632   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 633
 634   /* Pop the previous cache start from the stack.  */
 635   if (bidi_cache_sp <= 0)
 636     abort ();
 637   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 638
 639   /* Invalidate the last-used cache slot data.  */
 640   bidi_cache_last_idx = -1;
 641 }
 642
 643 static ptrdiff_t bidi_cache_total_alloc;
 644
 645 /* Stash away a copy of the cache and its control variables.  */
 646 void *
 647 bidi_shelve_cache (void)
 648 {
 649   unsigned char *databuf;
 650   ptrdiff_t alloc;
 651
 652   /* Empty cache.  */
 653   if (bidi_cache_idx == 0)
 654     return NULL;
 655
 656   alloc = (bidi_shelve_header_size
 657            + bidi_cache_idx * sizeof (struct bidi_it));
 658   databuf = xmalloc (alloc);
 659   bidi_cache_total_alloc += alloc;
 660
 661   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 662   memcpy (databuf + sizeof (bidi_cache_idx),
 663           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 664   memcpy (databuf + sizeof (bidi_cache_idx)
 665           + bidi_cache_idx * sizeof (struct bidi_it),
 666           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 667   memcpy (databuf + sizeof (bidi_cache_idx)
 668           + bidi_cache_idx * sizeof (struct bidi_it)
 669           + sizeof (bidi_cache_start_stack),
 670           &bidi_cache_sp, sizeof (bidi_cache_sp));
 671   memcpy (databuf + sizeof (bidi_cache_idx)
 672           + bidi_cache_idx * sizeof (struct bidi_it)
 673           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 674           &bidi_cache_start, sizeof (bidi_cache_start));
 675   memcpy (databuf + sizeof (bidi_cache_idx)
 676           + bidi_cache_idx * sizeof (struct bidi_it)
 677           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 678           + sizeof (bidi_cache_start),
 679           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 680
 681   return databuf;
 682 }
 683
 684 /* Restore the cache state from a copy stashed away by
 685    bidi_shelve_cache, and free the buffer used to stash that copy.
 686    JUST_FREE non-zero means free the buffer, but don't restore the
 687    cache; used when the corresponding iterator is discarded instead of
 688    being restored.  */
 689 void
 690 bidi_unshelve_cache (void *databuf, int just_free)
 691 {
 692   unsigned char *p = databuf;
 693
 694   if (!p)
 695     {
 696       if (!just_free)
 697         {
 698           /* A NULL pointer means an empty cache.  */
 699           bidi_cache_start = 0;
 700           bidi_cache_sp = 0;
 701           bidi_cache_reset ();
 702         }
 703     }
 704   else
 705     {
 706       if (just_free)
 707         {
 708           ptrdiff_t idx;
 709
 710           memcpy (&idx, p, sizeof (bidi_cache_idx));
 711           bidi_cache_total_alloc
 712             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 713         }
 714       else
 715         {
 716           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 717           bidi_cache_ensure_space (bidi_cache_idx);
 718           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 719                   bidi_cache_idx * sizeof (struct bidi_it));
 720           memcpy (bidi_cache_start_stack,
 721                   p + sizeof (bidi_cache_idx)
 722                   + bidi_cache_idx * sizeof (struct bidi_it),
 723                   sizeof (bidi_cache_start_stack));
 724           memcpy (&bidi_cache_sp,
 725                   p + sizeof (bidi_cache_idx)
 726                   + bidi_cache_idx * sizeof (struct bidi_it)
 727                   + sizeof (bidi_cache_start_stack),
 728                   sizeof (bidi_cache_sp));
 729           memcpy (&bidi_cache_start,
 730                   p + sizeof (bidi_cache_idx)
 731                   + bidi_cache_idx * sizeof (struct bidi_it)
 732                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 733                   sizeof (bidi_cache_start));
 734           memcpy (&bidi_cache_last_idx,
 735                   p + sizeof (bidi_cache_idx)
 736                   + bidi_cache_idx * sizeof (struct bidi_it)
 737                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 738                   + sizeof (bidi_cache_start),
 739                   sizeof (bidi_cache_last_idx));
 740           bidi_cache_total_alloc
 741             -= (bidi_shelve_header_size
 742                 + bidi_cache_idx * sizeof (struct bidi_it));
 743         }
 744
 745       xfree (p);
 746     }
 747 }
 748
 749 \f
 750 /***********************************************************************
 751                         Initialization
 752  ***********************************************************************/
 753 static void
 754 bidi_initialize (void)
 755 {
 756   bidi_type_table = uniprop_table (intern ("bidi-class"));
 757   if (NILP (bidi_type_table))
 758     abort ();
 759   staticpro (&bidi_type_table);
 760
 761   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 762   if (NILP (bidi_mirror_table))
 763     abort ();
 764   staticpro (&bidi_mirror_table);
 765
 766   Qparagraph_start = intern ("paragraph-start");
 767   staticpro (&Qparagraph_start);
 768   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 769   if (!STRINGP (paragraph_start_re))
 770     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 771   staticpro (&paragraph_start_re);
 772   Qparagraph_separate = intern ("paragraph-separate");
 773   staticpro (&Qparagraph_separate);
 774   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 775   if (!STRINGP (paragraph_separate_re))
 776     paragraph_separate_re = build_string ("[ \t\f]*$");
 777   staticpro (&paragraph_separate_re);
 778
 779   bidi_cache_sp = 0;
 780   bidi_cache_total_alloc = 0;
 781
 782   bidi_initialized = 1;
 783 }
 784
 785 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 786    end.  */
 787 static inline void
 788 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 789 {
 790   bidi_it->invalid_levels = 0;
 791   bidi_it->invalid_rl_levels = -1;
 792   bidi_it->stack_idx = 0;
 793   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 794 }
 795
 796 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 797 void
 798 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 799               struct bidi_it *bidi_it)
 800 {
 801   if (! bidi_initialized)
 802     bidi_initialize ();
 803   if (charpos >= 0)
 804     bidi_it->charpos = charpos;
 805   if (bytepos >= 0)
 806     bidi_it->bytepos = bytepos;
 807   bidi_it->frame_window_p = frame_window_p;
 808   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 809   bidi_it->first_elt = 1;
 810   bidi_set_paragraph_end (bidi_it);
 811   bidi_it->paragraph_dir = NEUTRAL_DIR;
 812   bidi_it->new_paragraph = 1;
 813   bidi_it->separator_limit = -1;
 814   bidi_it->type = NEUTRAL_B;
 815   bidi_it->type_after_w1 = NEUTRAL_B;
 816   bidi_it->orig_type = NEUTRAL_B;
 817   bidi_it->prev_was_pdf = 0;
 818   bidi_it->prev.type = bidi_it->prev.type_after_w1
 819     = bidi_it->prev.orig_type = UNKNOWN_BT;
 820   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 821     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 822   bidi_it->next_for_neutral.charpos = -1;
 823   bidi_it->next_for_neutral.type
 824     = bidi_it->next_for_neutral.type_after_w1
 825     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 826   bidi_it->prev_for_neutral.charpos = -1;
 827   bidi_it->prev_for_neutral.type
 828     = bidi_it->prev_for_neutral.type_after_w1
 829     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 830   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 831   bidi_it->disp_pos = -1;       /* invalid/unknown */
 832   bidi_it->disp_prop = 0;
 833   /* We can only shrink the cache if we are at the bottom level of its
 834      "stack".  */
 835   if (bidi_cache_start == 0)
 836     bidi_cache_shrink ();
 837   else
 838     bidi_cache_reset ();
 839 }
 840
 841 /* Perform initializations for reordering a new line of bidi text.  */
 842 static void
 843 bidi_line_init (struct bidi_it *bidi_it)
 844 {
 845   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 846   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 847   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 848   bidi_it->invalid_levels = 0;
 849   bidi_it->invalid_rl_levels = -1;
 850   bidi_it->next_en_pos = -1;
 851   bidi_it->next_for_ws.type = UNKNOWN_BT;
 852   bidi_set_sor_type (bidi_it,
 853                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 854                      bidi_it->level_stack[0].level); /* X10 */
 855
 856   bidi_cache_reset ();
 857 }
 858
 859 \f
 860 /***********************************************************************
 861                         Fetching characters
 862  ***********************************************************************/
 863
 864 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 865    are zero-based character positions in S, BEGBYTE is byte position
 866    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 867    string.  */
 868 static inline EMACS_INT
 869 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 870                   const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
 871 {
 872   EMACS_INT pos = beg;
 873   const unsigned char *p = s + begbyte, *start = p;
 874
 875   if (unibyte)
 876     p = s + end;
 877   else
 878     {
 879       if (!CHAR_HEAD_P (*p))
 880         abort ();
 881
 882       while (pos < end)
 883         {
 884           p += BYTES_BY_CHAR_HEAD (*p);
 885           pos++;
 886         }
 887     }
 888
 889   return p - start;
 890 }
 891
 892 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 893    non-NULL, fetch the character from string S; otherwise fetch the
 894    character from the current buffer.  UNIBYTE non-zero means S is a
 895    unibyte string.  */
 896 static inline int
 897 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
 898 {
 899   if (s)
 900     {
 901       if (unibyte)
 902         return s[bytepos];
 903       else
 904         return STRING_CHAR (s + bytepos);
 905     }
 906   else
 907     return FETCH_MULTIBYTE_CHAR (bytepos);
 908 }
 909
 910 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 911    character is covered by a display string, treat the entire run of
 912    covered characters as a single character, either u+2029 or u+FFFC,
 913    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 914    specifies the character position of the next display string, or -1
 915    if not yet computed.  When the next character is at or beyond that
 916    position, the function updates DISP_POS with the position of the
 917    next display string.  DISP_PROP non-zero means that there's really
 918    a display string at DISP_POS, as opposed to when we searched till
 919    DISP_POS without finding one.  If DISP_PROP is 2, it means the
 920    display spec is of the form `(space ...)', which is replaced with
 921    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 922    string to iterate, or NULL if iterating over a buffer or a Lisp
 923    string; in the latter case, STRING->lstring is the Lisp string.  */
 924 static inline int
 925 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 926                  int *disp_prop, struct bidi_string_data *string,
 927                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 928 {
 929   int ch;
 930   EMACS_INT endpos
 931     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 932   struct text_pos pos;
 933
 934   /* If we got past the last known position of display string, compute
 935      the position of the next one.  That position could be at CHARPOS.  */
 936   if (charpos < endpos && charpos > *disp_pos)
 937     {
 938       SET_TEXT_POS (pos, charpos, bytepos);
 939       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 940                                               disp_prop);
 941     }
 942
 943   /* Fetch the character at BYTEPOS.  */
 944   if (charpos >= endpos)
 945     {
 946       ch = BIDI_EOB;
 947       *ch_len = 1;
 948       *nchars = 1;
 949       *disp_pos = endpos;
 950       *disp_prop = 0;
 951     }
 952   else if (charpos >= *disp_pos && *disp_prop)
 953     {
 954       EMACS_INT disp_end_pos;
 955
 956       /* We don't expect to find ourselves in the middle of a display
 957          property.  Hopefully, it will never be needed.  */
 958       if (charpos > *disp_pos)
 959         abort ();
 960       /* Text covered by `display' properties and overlays with
 961          display properties or display strings is handled as a single
 962          character that represents the entire run of characters
 963          covered by the display property.  */
 964       if (*disp_prop == 2)
 965         {
 966           /* `(space ...)' display specs are handled as paragraph
 967              separators for the purposes of the reordering; see UAX#9
 968              section 3 and clause HL1 in section 4.3 there.  */
 969           ch = 0x2029;
 970         }
 971       else
 972         {
 973           /* All other display specs are handled as the Unicode Object
 974              Replacement Character.  */
 975           ch = 0xFFFC;
 976         }
 977       disp_end_pos = compute_display_string_end (*disp_pos, string);
 978       if (disp_end_pos < 0)
 979         {
 980           /* Somebody removed the display string from the buffer
 981              behind our back.  Recover by processing this buffer
 982              position as if no display property were present there to
 983              begin with.  */
 984           *disp_prop = 0;
 985           goto normal_char;
 986         }
 987       *nchars = disp_end_pos - *disp_pos;
 988       if (*nchars <= 0)
 989         abort ();
 990       if (string->s)
 991         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 992                                     disp_end_pos, string->unibyte);
 993       else if (STRINGP (string->lstring))
 994         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 995                                     bytepos, disp_end_pos, string->unibyte);
 996       else
 997         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 998     }
 999   else
1000     {
1001     normal_char:
1002       if (string->s)
1003         {
1004           int len;
1005
1006           if (!string->unibyte)
1007             {
1008               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1009               *ch_len = len;
1010             }
1011           else
1012             {
1013               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1014               *ch_len = 1;
1015             }
1016         }
1017       else if (STRINGP (string->lstring))
1018         {
1019           int len;
1020
1021           if (!string->unibyte)
1022             {
1023               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1024                                            len);
1025               *ch_len = len;
1026             }
1027           else
1028             {
1029               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1030               *ch_len = 1;
1031             }
1032         }
1033       else
1034         {
1035           ch = FETCH_MULTIBYTE_CHAR (bytepos);
1036           *ch_len = CHAR_BYTES (ch);
1037         }
1038       *nchars = 1;
1039     }
1040
1041   /* If we just entered a run of characters covered by a display
1042      string, compute the position of the next display string.  */
1043   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1044       && *disp_prop)
1045     {
1046       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1047       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1048                                               disp_prop);
1049     }
1050
1051   return ch;
1052 }
1053
1054 \f
1055 /***********************************************************************
1056                         Determining paragraph direction
1057  ***********************************************************************/
1058
1059 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1060    Value is the non-negative length of the paragraph separator
1061    following the buffer position, -1 if position is at the beginning
1062    of a new paragraph, or -2 if position is neither at beginning nor
1063    at end of a paragraph.  */
1064 static EMACS_INT
1065 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1066 {
1067   Lisp_Object sep_re;
1068   Lisp_Object start_re;
1069   EMACS_INT val;
1070
1071   sep_re = paragraph_separate_re;
1072   start_re = paragraph_start_re;
1073
1074   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1075   if (val < 0)
1076     {
1077       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1078         val = -1;
1079       else
1080         val = -2;
1081     }
1082
1083   return val;
1084 }
1085
1086 /* On my 2005-vintage machine, searching back for paragraph start
1087    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1088    when user types C-p.  The number below limits each call to
1089    bidi_paragraph_init to about 10 ms.  */
1090 #define MAX_PARAGRAPH_SEARCH 7500
1091
1092 /* Find the beginning of this paragraph by looking back in the buffer.
1093    Value is the byte position of the paragraph's beginning, or
1094    BEGV_BYTE if paragraph_start_re is still not found after looking
1095    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1096 static EMACS_INT
1097 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
1098 {
1099   Lisp_Object re = paragraph_start_re;
1100   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1101   EMACS_INT n = 0;
1102
1103   while (pos_byte > BEGV_BYTE
1104          && n++ < MAX_PARAGRAPH_SEARCH
1105          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1106     {
1107       /* FIXME: What if the paragraph beginning is covered by a
1108          display string?  And what if a display string covering some
1109          of the text over which we scan back includes
1110          paragraph_start_re?  */
1111       pos = find_next_newline_no_quit (pos - 1, -1);
1112       pos_byte = CHAR_TO_BYTE (pos);
1113     }
1114   if (n >= MAX_PARAGRAPH_SEARCH)
1115     pos_byte = BEGV_BYTE;
1116   return pos_byte;
1117 }
1118
1119 /* Determine the base direction, a.k.a. base embedding level, of the
1120    paragraph we are about to iterate through.  If DIR is either L2R or
1121    R2L, just use that.  Otherwise, determine the paragraph direction
1122    from the first strong directional character of the paragraph.
1123
1124    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1125    has no strong directional characters and both DIR and
1126    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1127    in the buffer until a paragraph is found with a strong character,
1128    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1129    flag is used in current-bidi-paragraph-direction.
1130
1131    Note that this function gives the paragraph separator the same
1132    direction as the preceding paragraph, even though Emacs generally
1133    views the separartor as not belonging to any paragraph.  */
1134 void
1135 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1136 {
1137   EMACS_INT bytepos = bidi_it->bytepos;
1138   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1139   EMACS_INT pstartbyte;
1140   /* Note that begbyte is a byte position, while end is a character
1141      position.  Yes, this is ugly, but we are trying to avoid costly
1142      calls to BYTE_TO_CHAR and its ilk.  */
1143   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1144   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
1145
1146   /* Special case for an empty buffer. */
1147   if (bytepos == begbyte && bidi_it->charpos == end)
1148     dir = L2R;
1149   /* We should never be called at EOB or before BEGV.  */
1150   else if (bidi_it->charpos >= end || bytepos < begbyte)
1151     abort ();
1152
1153   if (dir == L2R)
1154     {
1155       bidi_it->paragraph_dir = L2R;
1156       bidi_it->new_paragraph = 0;
1157     }
1158   else if (dir == R2L)
1159     {
1160       bidi_it->paragraph_dir = R2L;
1161       bidi_it->new_paragraph = 0;
1162     }
1163   else if (dir == NEUTRAL_DIR)  /* P2 */
1164     {
1165       int ch;
1166       EMACS_INT ch_len, nchars;
1167       EMACS_INT pos, disp_pos = -1;
1168       int disp_prop = 0;
1169       bidi_type_t type;
1170       const unsigned char *s;
1171
1172       if (!bidi_initialized)
1173         bidi_initialize ();
1174
1175       /* If we are inside a paragraph separator, we are just waiting
1176          for the separator to be exhausted; use the previous paragraph
1177          direction.  But don't do that if we have been just reseated,
1178          because we need to reinitialize below in that case.  */
1179       if (!bidi_it->first_elt
1180           && bidi_it->charpos < bidi_it->separator_limit)
1181         return;
1182
1183       /* If we are on a newline, get past it to where the next
1184          paragraph might start.  But don't do that at BEGV since then
1185          we are potentially in a new paragraph that doesn't yet
1186          exist.  */
1187       pos = bidi_it->charpos;
1188       s = (STRINGP (bidi_it->string.lstring)
1189            ? SDATA (bidi_it->string.lstring)
1190            : bidi_it->string.s);
1191       if (bytepos > begbyte
1192           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1193         {
1194           bytepos++;
1195           pos++;
1196         }
1197
1198       /* We are either at the beginning of a paragraph or in the
1199          middle of it.  Find where this paragraph starts.  */
1200       if (string_p)
1201         {
1202           /* We don't support changes of paragraph direction inside a
1203              string.  It is treated as a single paragraph.  */
1204           pstartbyte = 0;
1205         }
1206       else
1207         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1208       bidi_it->separator_limit = -1;
1209       bidi_it->new_paragraph = 0;
1210
1211       /* The following loop is run more than once only if NO_DEFAULT_P
1212          is non-zero, and only if we are iterating on a buffer.  */
1213       do {
1214         bytepos = pstartbyte;
1215         if (!string_p)
1216           pos = BYTE_TO_CHAR (bytepos);
1217         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1218                               &bidi_it->string,
1219                               bidi_it->frame_window_p, &ch_len, &nchars);
1220         type = bidi_get_type (ch, NEUTRAL_DIR);
1221
1222         for (pos += nchars, bytepos += ch_len;
1223              (bidi_get_category (type) != STRONG)
1224                || (bidi_ignore_explicit_marks_for_paragraph_level
1225                    && (type == RLE || type == RLO
1226                        || type == LRE || type == LRO));
1227              type = bidi_get_type (ch, NEUTRAL_DIR))
1228           {
1229             if (pos >= end)
1230               {
1231                 /* Pretend there's a paragraph separator at end of
1232                    buffer/string.  */
1233                 type = NEUTRAL_B;
1234                 break;
1235               }
1236             if (!string_p
1237                 && type == NEUTRAL_B
1238                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1239               break;
1240             /* Fetch next character and advance to get past it.  */
1241             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1242                                   &disp_prop, &bidi_it->string,
1243                                   bidi_it->frame_window_p, &ch_len, &nchars);
1244             pos += nchars;
1245             bytepos += ch_len;
1246           }
1247         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1248             || (!bidi_ignore_explicit_marks_for_paragraph_level
1249                 && (type == RLO || type == RLE)))
1250           bidi_it->paragraph_dir = R2L;
1251         else if (type == STRONG_L
1252                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1253                      && (type == LRO || type == LRE)))
1254           bidi_it->paragraph_dir = L2R;
1255         if (!string_p
1256             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1257           {
1258             /* If this paragraph is at BEGV, default to L2R.  */
1259             if (pstartbyte == BEGV_BYTE)
1260               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1261             else
1262               {
1263                 EMACS_INT prevpbyte = pstartbyte;
1264                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1265
1266                 /* Find the beginning of the previous paragraph, if any.  */
1267                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1268                   {
1269                     /* FXIME: What if p is covered by a display
1270                        string?  See also a FIXME inside
1271                        bidi_find_paragraph_start.  */
1272                     p--;
1273                     pbyte = CHAR_TO_BYTE (p);
1274                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1275                   }
1276                 pstartbyte = prevpbyte;
1277               }
1278           }
1279       } while (!string_p
1280                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1281     }
1282   else
1283     abort ();
1284
1285   /* Contrary to UAX#9 clause P3, we only default the paragraph
1286      direction to L2R if we have no previous usable paragraph
1287      direction.  This is allowed by the HL1 clause.  */
1288   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1289     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1290   if (bidi_it->paragraph_dir == R2L)
1291     bidi_it->level_stack[0].level = 1;
1292   else
1293     bidi_it->level_stack[0].level = 0;
1294
1295   bidi_line_init (bidi_it);
1296 }
1297
1298 \f
1299 /***********************************************************************
1300                  Resolving explicit and implicit levels.
1301   The rest of this file constitutes the core of the UBA implementation.
1302  ***********************************************************************/
1303
1304 static inline int
1305 bidi_explicit_dir_char (int ch)
1306 {
1307   bidi_type_t ch_type;
1308
1309   if (!bidi_initialized)
1310     abort ();
1311   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1312   return (ch_type == LRE || ch_type == LRO
1313           || ch_type == RLE || ch_type == RLO
1314           || ch_type == PDF);
1315 }
1316
1317 /* A helper function for bidi_resolve_explicit.  It advances to the
1318    next character in logical order and determines the new embedding
1319    level and directional override, but does not take into account
1320    empty embeddings.  */
1321 static int
1322 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1323 {
1324   int curchar;
1325   bidi_type_t type;
1326   int current_level;
1327   int new_level;
1328   bidi_dir_t override;
1329   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1330
1331   /* If reseat()'ed, don't advance, so as to start iteration from the
1332      position where we were reseated.  bidi_it->bytepos can be less
1333      than BEGV_BYTE after reseat to BEGV.  */
1334   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1335       || bidi_it->first_elt)
1336     {
1337       bidi_it->first_elt = 0;
1338       if (string_p)
1339         {
1340           const unsigned char *p
1341             = (STRINGP (bidi_it->string.lstring)
1342                ? SDATA (bidi_it->string.lstring)
1343                : bidi_it->string.s);
1344
1345           if (bidi_it->charpos < 0)
1346             bidi_it->charpos = 0;
1347           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1348                                                bidi_it->string.unibyte);
1349         }
1350       else
1351         {
1352           if (bidi_it->charpos < BEGV)
1353             bidi_it->charpos = BEGV;
1354           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1355         }
1356     }
1357   /* Don't move at end of buffer/string.  */
1358   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1359     {
1360       /* Advance to the next character, skipping characters covered by
1361          display strings (nchars > 1).  */
1362       if (bidi_it->nchars <= 0)
1363         abort ();
1364       bidi_it->charpos += bidi_it->nchars;
1365       if (bidi_it->ch_len == 0)
1366         abort ();
1367       bidi_it->bytepos += bidi_it->ch_len;
1368     }
1369
1370   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1371   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1372   new_level = current_level;
1373
1374   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1375     {
1376       curchar = BIDI_EOB;
1377       bidi_it->ch_len = 1;
1378       bidi_it->nchars = 1;
1379       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1380       bidi_it->disp_prop = 0;
1381     }
1382   else
1383     {
1384       /* Fetch the character at BYTEPOS.  If it is covered by a
1385          display string, treat the entire run of covered characters as
1386          a single character u+FFFC.  */
1387       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1388                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1389                                  &bidi_it->string, bidi_it->frame_window_p,
1390                                  &bidi_it->ch_len, &bidi_it->nchars);
1391     }
1392   bidi_it->ch = curchar;
1393
1394   /* Don't apply directional override here, as all the types we handle
1395      below will not be affected by the override anyway, and we need
1396      the original type unaltered.  The override will be applied in
1397      bidi_resolve_weak.  */
1398   type = bidi_get_type (curchar, NEUTRAL_DIR);
1399   bidi_it->orig_type = type;
1400   bidi_check_type (bidi_it->orig_type);
1401
1402   if (type != PDF)
1403     bidi_it->prev_was_pdf = 0;
1404
1405   bidi_it->type_after_w1 = UNKNOWN_BT;
1406
1407   switch (type)
1408     {
1409       case RLE: /* X2 */
1410       case RLO: /* X4 */
1411         bidi_it->type_after_w1 = type;
1412         bidi_check_type (bidi_it->type_after_w1);
1413         type = WEAK_BN; /* X9/Retaining */
1414         if (bidi_it->ignore_bn_limit <= -1)
1415           {
1416             if (current_level <= BIDI_MAXLEVEL - 4)
1417               {
1418                 /* Compute the least odd embedding level greater than
1419                    the current level.  */
1420                 new_level = ((current_level + 1) & ~1) + 1;
1421                 if (bidi_it->type_after_w1 == RLE)
1422                   override = NEUTRAL_DIR;
1423                 else
1424                   override = R2L;
1425                 if (current_level == BIDI_MAXLEVEL - 4)
1426                   bidi_it->invalid_rl_levels = 0;
1427                 bidi_push_embedding_level (bidi_it, new_level, override);
1428               }
1429             else
1430               {
1431                 bidi_it->invalid_levels++;
1432                 /* See the commentary about invalid_rl_levels below.  */
1433                 if (bidi_it->invalid_rl_levels < 0)
1434                   bidi_it->invalid_rl_levels = 0;
1435                 bidi_it->invalid_rl_levels++;
1436               }
1437           }
1438         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1439                  || bidi_it->next_en_pos > bidi_it->charpos)
1440           type = WEAK_EN;
1441         break;
1442       case LRE: /* X3 */
1443       case LRO: /* X5 */
1444         bidi_it->type_after_w1 = type;
1445         bidi_check_type (bidi_it->type_after_w1);
1446         type = WEAK_BN; /* X9/Retaining */
1447         if (bidi_it->ignore_bn_limit <= -1)
1448           {
1449             if (current_level <= BIDI_MAXLEVEL - 5)
1450               {
1451                 /* Compute the least even embedding level greater than
1452                    the current level.  */
1453                 new_level = ((current_level + 2) & ~1);
1454                 if (bidi_it->type_after_w1 == LRE)
1455                   override = NEUTRAL_DIR;
1456                 else
1457                   override = L2R;
1458                 bidi_push_embedding_level (bidi_it, new_level, override);
1459               }
1460             else
1461               {
1462                 bidi_it->invalid_levels++;
1463                 /* invalid_rl_levels counts invalid levels encountered
1464                    while the embedding level was already too high for
1465                    LRE/LRO, but not for RLE/RLO.  That is because
1466                    there may be exactly one PDF which we should not
1467                    ignore even though invalid_levels is non-zero.
1468                    invalid_rl_levels helps to know what PDF is
1469                    that.  */
1470                 if (bidi_it->invalid_rl_levels >= 0)
1471                   bidi_it->invalid_rl_levels++;
1472               }
1473           }
1474         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1475                  || bidi_it->next_en_pos > bidi_it->charpos)
1476           type = WEAK_EN;
1477         break;
1478       case PDF: /* X7 */
1479         bidi_it->type_after_w1 = type;
1480         bidi_check_type (bidi_it->type_after_w1);
1481         type = WEAK_BN; /* X9/Retaining */
1482         if (bidi_it->ignore_bn_limit <= -1)
1483           {
1484             if (!bidi_it->invalid_rl_levels)
1485               {
1486                 new_level = bidi_pop_embedding_level (bidi_it);
1487                 bidi_it->invalid_rl_levels = -1;
1488                 if (bidi_it->invalid_levels)
1489                   bidi_it->invalid_levels--;
1490                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1491               }
1492             if (!bidi_it->invalid_levels)
1493               new_level = bidi_pop_embedding_level (bidi_it);
1494             else
1495               {
1496                 bidi_it->invalid_levels--;
1497                 bidi_it->invalid_rl_levels--;
1498               }
1499           }
1500         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1501                  || bidi_it->next_en_pos > bidi_it->charpos)
1502           type = WEAK_EN;
1503         break;
1504       default:
1505         /* Nothing.  */
1506         break;
1507     }
1508
1509   bidi_it->type = type;
1510   bidi_check_type (bidi_it->type);
1511
1512   return new_level;
1513 }
1514
1515 /* Given an iterator state in BIDI_IT, advance one character position
1516    in the buffer/string to the next character (in the logical order),
1517    resolve any explicit embeddings and directional overrides, and
1518    return the embedding level of the character after resolving
1519    explicit directives and ignoring empty embeddings.  */
1520 static int
1521 bidi_resolve_explicit (struct bidi_it *bidi_it)
1522 {
1523   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1524   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1525   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1526   const unsigned char *s
1527     = (STRINGP (bidi_it->string.lstring)
1528        ? SDATA (bidi_it->string.lstring)
1529        : bidi_it->string.s);
1530
1531   if (prev_level < new_level
1532       && bidi_it->type == WEAK_BN
1533       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1534       && bidi_it->charpos < eob         /* not already at EOB */
1535       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1536                                                    + bidi_it->ch_len, s,
1537                                                    bidi_it->string.unibyte)))
1538     {
1539       /* Avoid pushing and popping embedding levels if the level run
1540          is empty, as this breaks level runs where it shouldn't.
1541          UAX#9 removes all the explicit embedding and override codes,
1542          so empty embeddings disappear without a trace.  We need to
1543          behave as if we did the same.  */
1544       struct bidi_it saved_it;
1545       int level = prev_level;
1546
1547       bidi_copy_it (&saved_it, bidi_it);
1548
1549       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1550                                                        + bidi_it->ch_len, s,
1551                                                        bidi_it->string.unibyte)))
1552         {
1553           /* This advances to the next character, skipping any
1554              characters covered by display strings.  */
1555           level = bidi_resolve_explicit_1 (bidi_it);
1556           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1557              a pointer to its data is no longer valid.  */
1558           if (STRINGP (bidi_it->string.lstring))
1559             s = SDATA (bidi_it->string.lstring);
1560         }
1561
1562       if (bidi_it->nchars <= 0)
1563         abort ();
1564       if (level == prev_level)  /* empty embedding */
1565         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1566       else                      /* this embedding is non-empty */
1567         saved_it.ignore_bn_limit = -2;
1568
1569       bidi_copy_it (bidi_it, &saved_it);
1570       if (bidi_it->ignore_bn_limit > -1)
1571         {
1572           /* We pushed a level, but we shouldn't have.  Undo that. */
1573           if (!bidi_it->invalid_rl_levels)
1574             {
1575               new_level = bidi_pop_embedding_level (bidi_it);
1576               bidi_it->invalid_rl_levels = -1;
1577               if (bidi_it->invalid_levels)
1578                 bidi_it->invalid_levels--;
1579             }
1580           if (!bidi_it->invalid_levels)
1581             new_level = bidi_pop_embedding_level (bidi_it);
1582           else
1583             {
1584               bidi_it->invalid_levels--;
1585               bidi_it->invalid_rl_levels--;
1586             }
1587         }
1588     }
1589
1590   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1591     {
1592       bidi_set_paragraph_end (bidi_it);
1593       /* This is needed by bidi_resolve_weak below, and in L1.  */
1594       bidi_it->type_after_w1 = bidi_it->type;
1595       bidi_check_type (bidi_it->type_after_w1);
1596     }
1597
1598   return new_level;
1599 }
1600
1601 /* Advance in the buffer/string, resolve weak types and return the
1602    type of the next character after weak type resolution.  */
1603 static bidi_type_t
1604 bidi_resolve_weak (struct bidi_it *bidi_it)
1605 {
1606   bidi_type_t type;
1607   bidi_dir_t override;
1608   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1609   int new_level  = bidi_resolve_explicit (bidi_it);
1610   int next_char;
1611   bidi_type_t type_of_next;
1612   struct bidi_it saved_it;
1613   EMACS_INT eob
1614     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1615        ? bidi_it->string.schars : ZV);
1616
1617   type = bidi_it->type;
1618   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1619
1620   if (type == UNKNOWN_BT
1621       || type == LRE
1622       || type == LRO
1623       || type == RLE
1624       || type == RLO
1625       || type == PDF)
1626     abort ();
1627
1628   if (new_level != prev_level
1629       || bidi_it->type == NEUTRAL_B)
1630     {
1631       /* We've got a new embedding level run, compute the directional
1632          type of sor and initialize per-run variables (UAX#9, clause
1633          X10).  */
1634       bidi_set_sor_type (bidi_it, prev_level, new_level);
1635     }
1636   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1637            || type == WEAK_BN || type == STRONG_AL)
1638     bidi_it->type_after_w1 = type;      /* needed in L1 */
1639   bidi_check_type (bidi_it->type_after_w1);
1640
1641   /* Level and directional override status are already recorded in
1642      bidi_it, and do not need any change; see X6.  */
1643   if (override == R2L)          /* X6 */
1644     type = STRONG_R;
1645   else if (override == L2R)
1646     type = STRONG_L;
1647   else
1648     {
1649       if (type == WEAK_NSM)     /* W1 */
1650         {
1651           /* Note that we don't need to consider the case where the
1652              prev character has its type overridden by an RLO or LRO,
1653              because then either the type of this NSM would have been
1654              also overridden, or the previous character is outside the
1655              current level run, and thus not relevant to this NSM.
1656              This is why NSM gets the type_after_w1 of the previous
1657              character.  */
1658           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1659               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1660               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1661             type = bidi_it->prev.type_after_w1;
1662           else if (bidi_it->sor == R2L)
1663             type = STRONG_R;
1664           else if (bidi_it->sor == L2R)
1665             type = STRONG_L;
1666           else /* shouldn't happen! */
1667             abort ();
1668         }
1669       if (type == WEAK_EN       /* W2 */
1670           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1671         type = WEAK_AN;
1672       else if (type == STRONG_AL) /* W3 */
1673         type = STRONG_R;
1674       else if ((type == WEAK_ES /* W4 */
1675                 && bidi_it->prev.type_after_w1 == WEAK_EN
1676                 && bidi_it->prev.orig_type == WEAK_EN)
1677                || (type == WEAK_CS
1678                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1679                         && bidi_it->prev.orig_type == WEAK_EN)
1680                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1681         {
1682           const unsigned char *s
1683             = (STRINGP (bidi_it->string.lstring)
1684                ? SDATA (bidi_it->string.lstring)
1685                : bidi_it->string.s);
1686
1687           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1688                        ? BIDI_EOB
1689                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1690                                            s, bidi_it->string.unibyte));
1691           type_of_next = bidi_get_type (next_char, override);
1692
1693           if (type_of_next == WEAK_BN
1694               || bidi_explicit_dir_char (next_char))
1695             {
1696               bidi_copy_it (&saved_it, bidi_it);
1697               while (bidi_resolve_explicit (bidi_it) == new_level
1698                      && bidi_it->type == WEAK_BN)
1699                 ;
1700               type_of_next = bidi_it->type;
1701               bidi_copy_it (bidi_it, &saved_it);
1702             }
1703
1704           /* If the next character is EN, but the last strong-type
1705              character is AL, that next EN will be changed to AN when
1706              we process it in W2 above.  So in that case, this ES
1707              should not be changed into EN.  */
1708           if (type == WEAK_ES
1709               && type_of_next == WEAK_EN
1710               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1711             type = WEAK_EN;
1712           else if (type == WEAK_CS)
1713             {
1714               if (bidi_it->prev.type_after_w1 == WEAK_AN
1715                   && (type_of_next == WEAK_AN
1716                       /* If the next character is EN, but the last
1717                          strong-type character is AL, EN will be later
1718                          changed to AN when we process it in W2 above.
1719                          So in that case, this ES should not be
1720                          changed into EN.  */
1721                       || (type_of_next == WEAK_EN
1722                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1723                 type = WEAK_AN;
1724               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1725                        && type_of_next == WEAK_EN
1726                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1727                 type = WEAK_EN;
1728             }
1729         }
1730       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1731                || type == WEAK_BN)      /* W5/Retaining */
1732         {
1733           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1734               || bidi_it->next_en_pos > bidi_it->charpos)
1735             type = WEAK_EN;
1736           else                  /* W5: ET/BN with EN after it.  */
1737             {
1738               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1739               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1740                                         ? SDATA (bidi_it->string.lstring)
1741                                         : bidi_it->string.s);
1742
1743               if (bidi_it->nchars <= 0)
1744                 abort ();
1745               next_char
1746                 = (bidi_it->charpos + bidi_it->nchars >= eob
1747                    ? BIDI_EOB
1748                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1749                                        bidi_it->string.unibyte));
1750               type_of_next = bidi_get_type (next_char, override);
1751
1752               if (type_of_next == WEAK_ET
1753                   || type_of_next == WEAK_BN
1754                   || bidi_explicit_dir_char (next_char))
1755                 {
1756                   bidi_copy_it (&saved_it, bidi_it);
1757                   while (bidi_resolve_explicit (bidi_it) == new_level
1758                          && (bidi_it->type == WEAK_BN
1759                              || bidi_it->type == WEAK_ET))
1760                     ;
1761                   type_of_next = bidi_it->type;
1762                   en_pos = bidi_it->charpos;
1763                   bidi_copy_it (bidi_it, &saved_it);
1764                 }
1765               if (type_of_next == WEAK_EN)
1766                 {
1767                   /* If the last strong character is AL, the EN we've
1768                      found will become AN when we get to it (W2). */
1769                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1770                     {
1771                       type = WEAK_EN;
1772                       /* Remember this EN position, to speed up processing
1773                          of the next ETs.  */
1774                       bidi_it->next_en_pos = en_pos;
1775                     }
1776                   else if (type == WEAK_BN)
1777                     type = NEUTRAL_ON; /* W6/Retaining */
1778                 }
1779             }
1780         }
1781     }
1782
1783   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1784       || (type == WEAK_BN
1785           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1786               || bidi_it->prev.type_after_w1 == WEAK_ES
1787               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1788     type = NEUTRAL_ON;
1789
1790   /* Store the type we've got so far, before we clobber it with strong
1791      types in W7 and while resolving neutral types.  But leave alone
1792      the original types that were recorded above, because we will need
1793      them for the L1 clause.  */
1794   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1795     bidi_it->type_after_w1 = type;
1796   bidi_check_type (bidi_it->type_after_w1);
1797
1798   if (type == WEAK_EN)  /* W7 */
1799     {
1800       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1801           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1802         type = STRONG_L;
1803     }
1804
1805   bidi_it->type = type;
1806   bidi_check_type (bidi_it->type);
1807   return type;
1808 }
1809
1810 /* Resolve the type of a neutral character according to the type of
1811    surrounding strong text and the current embedding level.  */
1812 static inline bidi_type_t
1813 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1814 {
1815   /* N1: European and Arabic numbers are treated as though they were R.  */
1816   if (next_type == WEAK_EN || next_type == WEAK_AN)
1817     next_type = STRONG_R;
1818   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1819     prev_type = STRONG_R;
1820
1821   if (next_type == prev_type)   /* N1 */
1822     return next_type;
1823   else if ((lev & 1) == 0)      /* N2 */
1824     return STRONG_L;
1825   else
1826     return STRONG_R;
1827 }
1828
1829 static bidi_type_t
1830 bidi_resolve_neutral (struct bidi_it *bidi_it)
1831 {
1832   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1833   bidi_type_t type = bidi_resolve_weak (bidi_it);
1834   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1835
1836   if (!(type == STRONG_R
1837         || type == STRONG_L
1838         || type == WEAK_BN
1839         || type == WEAK_EN
1840         || type == WEAK_AN
1841         || type == NEUTRAL_B
1842         || type == NEUTRAL_S
1843         || type == NEUTRAL_WS
1844         || type == NEUTRAL_ON))
1845     abort ();
1846
1847   if (bidi_get_category (type) == NEUTRAL
1848       || (type == WEAK_BN && prev_level == current_level))
1849     {
1850       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1851         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1852                                        bidi_it->next_for_neutral.type,
1853                                        current_level);
1854       else
1855         {
1856           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1857              the assumption of batch-style processing; see clauses W4,
1858              W5, and especially N1, which require to look far forward
1859              (as well as back) in the buffer/string.  May the fleas of
1860              a thousand camels infest the armpits of those who design
1861              supposedly general-purpose algorithms by looking at their
1862              own implementations, and fail to consider other possible
1863              implementations!  */
1864           struct bidi_it saved_it;
1865           bidi_type_t next_type;
1866
1867           if (bidi_it->scan_dir == -1)
1868             abort ();
1869
1870           bidi_copy_it (&saved_it, bidi_it);
1871           /* Scan the text forward until we find the first non-neutral
1872              character, and then use that to resolve the neutral we
1873              are dealing with now.  We also cache the scanned iterator
1874              states, to salvage some of the effort later.  */
1875           bidi_cache_iterator_state (bidi_it, 0);
1876           do {
1877             /* Record the info about the previous character, so that
1878                it will be cached below with this state.  */
1879             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1880                 && bidi_it->type != WEAK_BN)
1881               bidi_remember_char (&bidi_it->prev, bidi_it);
1882             type = bidi_resolve_weak (bidi_it);
1883             /* Paragraph separators have their levels fully resolved
1884                at this point, so cache them as resolved.  */
1885             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1886             /* FIXME: implement L1 here, by testing for a newline and
1887                resetting the level for any sequence of whitespace
1888                characters adjacent to it.  */
1889           } while (!(type == NEUTRAL_B
1890                      || (type != WEAK_BN
1891                          && bidi_get_category (type) != NEUTRAL)
1892                      /* This is all per level run, so stop when we
1893                         reach the end of this level run.  */
1894                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1895                          != current_level)));
1896
1897           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1898
1899           switch (type)
1900             {
1901               case STRONG_L:
1902               case STRONG_R:
1903               case STRONG_AL:
1904                 next_type = type;
1905                 break;
1906               case WEAK_EN:
1907               case WEAK_AN:
1908                 /* N1: ``European and Arabic numbers are treated as
1909                    though they were R.''  */
1910                 next_type = STRONG_R;
1911                 saved_it.next_for_neutral.type = STRONG_R;
1912                 break;
1913               case WEAK_BN:
1914                 if (!bidi_explicit_dir_char (bidi_it->ch))
1915                   abort ();             /* can't happen: BNs are skipped */
1916                 /* FALLTHROUGH */
1917               case NEUTRAL_B:
1918                 /* Marched all the way to the end of this level run.
1919                    We need to use the eor type, whose information is
1920                    stored by bidi_set_sor_type in the prev_for_neutral
1921                    member.  */
1922                 if (saved_it.type != WEAK_BN
1923                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1924                   {
1925                     next_type = bidi_it->prev_for_neutral.type;
1926                     saved_it.next_for_neutral.type = next_type;
1927                     bidi_check_type (next_type);
1928                   }
1929                 else
1930                   {
1931                     /* This is a BN which does not adjoin neutrals.
1932                        Leave its type alone.  */
1933                     bidi_copy_it (bidi_it, &saved_it);
1934                     return bidi_it->type;
1935                   }
1936                 break;
1937               default:
1938                 abort ();
1939             }
1940           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1941                                          next_type, current_level);
1942           saved_it.type = type;
1943           bidi_check_type (type);
1944           bidi_copy_it (bidi_it, &saved_it);
1945         }
1946     }
1947   return type;
1948 }
1949
1950 /* Given an iterator state in BIDI_IT, advance one character position
1951    in the buffer/string to the next character (in the logical order),
1952    resolve the bidi type of that next character, and return that
1953    type.  */
1954 static bidi_type_t
1955 bidi_type_of_next_char (struct bidi_it *bidi_it)
1956 {
1957   bidi_type_t type;
1958
1959   /* This should always be called during a forward scan.  */
1960   if (bidi_it->scan_dir != 1)
1961     abort ();
1962
1963   /* Reset the limit until which to ignore BNs if we step out of the
1964      area where we found only empty levels.  */
1965   if ((bidi_it->ignore_bn_limit > -1
1966        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1967       || (bidi_it->ignore_bn_limit == -2
1968           && !bidi_explicit_dir_char (bidi_it->ch)))
1969     bidi_it->ignore_bn_limit = -1;
1970
1971   type = bidi_resolve_neutral (bidi_it);
1972
1973   return type;
1974 }
1975
1976 /* Given an iterator state BIDI_IT, advance one character position in
1977    the buffer/string to the next character (in the current scan
1978    direction), resolve the embedding and implicit levels of that next
1979    character, and return the resulting level.  */
1980 static int
1981 bidi_level_of_next_char (struct bidi_it *bidi_it)
1982 {
1983   bidi_type_t type;
1984   int level, prev_level = -1;
1985   struct bidi_saved_info next_for_neutral;
1986   EMACS_INT next_char_pos = -2;
1987
1988   if (bidi_it->scan_dir == 1)
1989     {
1990       EMACS_INT eob
1991         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1992            ? bidi_it->string.schars : ZV);
1993
1994       /* There's no sense in trying to advance if we hit end of text.  */
1995       if (bidi_it->charpos >= eob)
1996         return bidi_it->resolved_level;
1997
1998       /* Record the info about the previous character.  */
1999       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2000           && bidi_it->type != WEAK_BN)
2001         bidi_remember_char (&bidi_it->prev, bidi_it);
2002       if (bidi_it->type_after_w1 == STRONG_R
2003           || bidi_it->type_after_w1 == STRONG_L
2004           || bidi_it->type_after_w1 == STRONG_AL)
2005         bidi_remember_char (&bidi_it->last_strong, bidi_it);
2006       /* FIXME: it sounds like we don't need both prev and
2007          prev_for_neutral members, but I'm leaving them both for now.  */
2008       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2009           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2010         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2011
2012       /* If we overstepped the characters used for resolving neutrals
2013          and whitespace, invalidate their info in the iterator.  */
2014       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2015         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2016       if (bidi_it->next_en_pos >= 0
2017           && bidi_it->charpos >= bidi_it->next_en_pos)
2018         bidi_it->next_en_pos = -1;
2019       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2020           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2021         bidi_it->next_for_ws.type = UNKNOWN_BT;
2022
2023       /* This must be taken before we fill the iterator with the info
2024          about the next char.  If we scan backwards, the iterator
2025          state must be already cached, so there's no need to know the
2026          embedding level of the previous character, since we will be
2027          returning to our caller shortly.  */
2028       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2029     }
2030   next_for_neutral = bidi_it->next_for_neutral;
2031
2032   /* Perhaps the character we want is already cached.  If it is, the
2033      call to bidi_cache_find below will return a type other than
2034      UNKNOWN_BT.  */
2035   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2036     {
2037       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2038                  ? 0 : 1);
2039       if (bidi_it->scan_dir > 0)
2040         {
2041           if (bidi_it->nchars <= 0)
2042             abort ();
2043           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2044         }
2045       else if (bidi_it->charpos >= bob)
2046         /* Implementation note: we allow next_char_pos to be as low as
2047            0 for buffers or -1 for strings, and that is okay because
2048            that's the "position" of the sentinel iterator state we
2049            cached at the beginning of the iteration.  */
2050         next_char_pos = bidi_it->charpos - 1;
2051       if (next_char_pos >= bob - 1)
2052         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2053       else
2054         type = UNKNOWN_BT;
2055     }
2056   else
2057     type = UNKNOWN_BT;
2058   if (type != UNKNOWN_BT)
2059     {
2060       /* Don't lose the information for resolving neutrals!  The
2061          cached states could have been cached before their
2062          next_for_neutral member was computed.  If we are on our way
2063          forward, we can simply take the info from the previous
2064          state.  */
2065       if (bidi_it->scan_dir == 1
2066           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2067         bidi_it->next_for_neutral = next_for_neutral;
2068
2069       /* If resolved_level is -1, it means this state was cached
2070          before it was completely resolved, so we cannot return
2071          it.  */
2072       if (bidi_it->resolved_level != -1)
2073         return bidi_it->resolved_level;
2074     }
2075   if (bidi_it->scan_dir == -1)
2076     /* If we are going backwards, the iterator state is already cached
2077        from previous scans, and should be fully resolved.  */
2078     abort ();
2079
2080   if (type == UNKNOWN_BT)
2081     type = bidi_type_of_next_char (bidi_it);
2082
2083   if (type == NEUTRAL_B)
2084     return bidi_it->resolved_level;
2085
2086   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2087   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2088       || (type == WEAK_BN && prev_level == level))
2089     {
2090       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2091         abort ();
2092
2093       /* If the cached state shows a neutral character, it was not
2094          resolved by bidi_resolve_neutral, so do it now.  */
2095       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2096                                      bidi_it->next_for_neutral.type,
2097                                      level);
2098     }
2099
2100   if (!(type == STRONG_R
2101         || type == STRONG_L
2102         || type == WEAK_BN
2103         || type == WEAK_EN
2104         || type == WEAK_AN))
2105     abort ();
2106   bidi_it->type = type;
2107   bidi_check_type (bidi_it->type);
2108
2109   /* For L1 below, we need to know, for each WS character, whether
2110      it belongs to a sequence of WS characters preceding a newline
2111      or a TAB or a paragraph separator.  */
2112   if (bidi_it->orig_type == NEUTRAL_WS
2113       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2114     {
2115       int ch;
2116       EMACS_INT clen = bidi_it->ch_len;
2117       EMACS_INT bpos = bidi_it->bytepos;
2118       EMACS_INT cpos = bidi_it->charpos;
2119       EMACS_INT disp_pos = bidi_it->disp_pos;
2120       EMACS_INT nc = bidi_it->nchars;
2121       struct bidi_string_data bs = bidi_it->string;
2122       bidi_type_t chtype;
2123       int fwp = bidi_it->frame_window_p;
2124       int dpp = bidi_it->disp_prop;
2125
2126       if (bidi_it->nchars <= 0)
2127         abort ();
2128       do {
2129         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2130                               fwp, &clen, &nc);
2131         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
2132           chtype = NEUTRAL_B;
2133         else
2134           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2135       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2136                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2137       bidi_it->next_for_ws.type = chtype;
2138       bidi_check_type (bidi_it->next_for_ws.type);
2139       bidi_it->next_for_ws.charpos = cpos;
2140       bidi_it->next_for_ws.bytepos = bpos;
2141     }
2142
2143   /* Resolve implicit levels, with a twist: PDFs get the embedding
2144      level of the enbedding they terminate.  See below for the
2145      reason.  */
2146   if (bidi_it->orig_type == PDF
2147       /* Don't do this if this formatting code didn't change the
2148          embedding level due to invalid or empty embeddings.  */
2149       && prev_level != level)
2150     {
2151       /* Don't look in UAX#9 for the reason for this: it's our own
2152          private quirk.  The reason is that we want the formatting
2153          codes to be delivered so that they bracket the text of their
2154          embedding.  For example, given the text
2155
2156              {RLO}teST{PDF}
2157
2158          we want it to be displayed as
2159
2160              {PDF}STet{RLO}
2161
2162          not as
2163
2164              STet{RLO}{PDF}
2165
2166          which will result because we bump up the embedding level as
2167          soon as we see the RLO and pop it as soon as we see the PDF,
2168          so RLO itself has the same embedding level as "teST", and
2169          thus would be normally delivered last, just before the PDF.
2170          The switch below fiddles with the level of PDF so that this
2171          ugly side effect does not happen.
2172
2173          (This is, of course, only important if the formatting codes
2174          are actually displayed, but Emacs does need to display them
2175          if the user wants to.)  */
2176       level = prev_level;
2177     }
2178   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2179            || bidi_it->orig_type == NEUTRAL_S
2180            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2181            /* || bidi_it->ch == LINESEP_CHAR */
2182            || (bidi_it->orig_type == NEUTRAL_WS
2183                && (bidi_it->next_for_ws.type == NEUTRAL_B
2184                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2185     level = bidi_it->level_stack[0].level;
2186   else if ((level & 1) == 0) /* I1 */
2187     {
2188       if (type == STRONG_R)
2189         level++;
2190       else if (type == WEAK_EN || type == WEAK_AN)
2191         level += 2;
2192     }
2193   else                  /* I2 */
2194     {
2195       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2196         level++;
2197     }
2198
2199   bidi_it->resolved_level = level;
2200   return level;
2201 }
2202
2203 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2204    non-zero, we are at the end of a level, and we need to prepare to
2205    resume the scan of the lower level.
2206
2207    If this level's other edge is cached, we simply jump to it, filling
2208    the iterator structure with the iterator state on the other edge.
2209    Otherwise, we walk the buffer or string until we come back to the
2210    same level as LEVEL.
2211
2212    Note: we are not talking here about a ``level run'' in the UAX#9
2213    sense of the term, but rather about a ``level'' which includes
2214    all the levels higher than it.  In other words, given the levels
2215    like this:
2216
2217          11111112222222333333334443343222222111111112223322111
2218                 A      B                    C
2219
2220    and assuming we are at point A scanning left to right, this
2221    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2222    at point B.  */
2223 static void
2224 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2225 {
2226   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2227   ptrdiff_t idx;
2228
2229   /* Try the cache first.  */
2230   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2231       >= bidi_cache_start)
2232     bidi_cache_fetch_state (idx, bidi_it);
2233   else
2234     {
2235       int new_level;
2236
2237       if (end_flag)
2238         abort (); /* if we are at end of level, its edges must be cached */
2239
2240       bidi_cache_iterator_state (bidi_it, 1);
2241       do {
2242         new_level = bidi_level_of_next_char (bidi_it);
2243         bidi_cache_iterator_state (bidi_it, 1);
2244       } while (new_level >= level);
2245     }
2246 }
2247
2248 void
2249 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2250 {
2251   int old_level, new_level, next_level;
2252   struct bidi_it sentinel;
2253   struct gcpro gcpro1;
2254
2255   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2256     abort ();
2257
2258   if (bidi_it->scan_dir == 0)
2259     {
2260       bidi_it->scan_dir = 1;    /* default to logical order */
2261     }
2262
2263   /* The code below can call eval, and thus cause GC.  If we are
2264      iterating a Lisp string, make sure it won't be GCed.  */
2265   if (STRINGP (bidi_it->string.lstring))
2266     GCPRO1 (bidi_it->string.lstring);
2267
2268   /* If we just passed a newline, initialize for the next line.  */
2269   if (!bidi_it->first_elt
2270       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2271     bidi_line_init (bidi_it);
2272
2273   /* Prepare the sentinel iterator state, and cache it.  When we bump
2274      into it, scanning backwards, we'll know that the last non-base
2275      level is exhausted.  */
2276   if (bidi_cache_idx == bidi_cache_start)
2277     {
2278       bidi_copy_it (&sentinel, bidi_it);
2279       if (bidi_it->first_elt)
2280         {
2281           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2282           sentinel.bytepos--;
2283           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2284           sentinel.ch_len = 1;
2285           sentinel.nchars = 1;
2286         }
2287       bidi_cache_iterator_state (&sentinel, 1);
2288     }
2289
2290   old_level = bidi_it->resolved_level;
2291   new_level = bidi_level_of_next_char (bidi_it);
2292
2293   /* Reordering of resolved levels (clause L2) is implemented by
2294      jumping to the other edge of the level and flipping direction of
2295      scanning the text whenever we find a level change.  */
2296   if (new_level != old_level)
2297     {
2298       int ascending = new_level > old_level;
2299       int level_to_search = ascending ? old_level + 1 : old_level;
2300       int incr = ascending ? 1 : -1;
2301       int expected_next_level = old_level + incr;
2302
2303       /* Jump (or walk) to the other edge of this level.  */
2304       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2305       /* Switch scan direction and peek at the next character in the
2306          new direction.  */
2307       bidi_it->scan_dir = -bidi_it->scan_dir;
2308
2309       /* The following loop handles the case where the resolved level
2310          jumps by more than one.  This is typical for numbers inside a
2311          run of text with left-to-right embedding direction, but can
2312          also happen in other situations.  In those cases the decision
2313          where to continue after a level change, and in what direction,
2314          is tricky.  For example, given a text like below:
2315
2316                   abcdefgh
2317                   11336622
2318
2319          (where the numbers below the text show the resolved levels),
2320          the result of reordering according to UAX#9 should be this:
2321
2322                   efdcghba
2323
2324          This is implemented by the loop below which flips direction
2325          and jumps to the other edge of the level each time it finds
2326          the new level not to be the expected one.  The expected level
2327          is always one more or one less than the previous one.  */
2328       next_level = bidi_peek_at_next_level (bidi_it);
2329       while (next_level != expected_next_level)
2330         {
2331           expected_next_level += incr;
2332           level_to_search += incr;
2333           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2334           bidi_it->scan_dir = -bidi_it->scan_dir;
2335           next_level = bidi_peek_at_next_level (bidi_it);
2336         }
2337
2338       /* Finally, deliver the next character in the new direction.  */
2339       next_level = bidi_level_of_next_char (bidi_it);
2340     }
2341
2342   /* Take note when we have just processed the newline that precedes
2343      the end of the paragraph.  The next time we are about to be
2344      called, set_iterator_to_next will automatically reinit the
2345      paragraph direction, if needed.  We do this at the newline before
2346      the paragraph separator, because the next character might not be
2347      the first character of the next paragraph, due to the bidi
2348      reordering, whereas we _must_ know the paragraph base direction
2349      _before_ we process the paragraph's text, since the base
2350      direction affects the reordering.  */
2351   if (bidi_it->scan_dir == 1
2352       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2353     {
2354       /* The paragraph direction of the entire string, once
2355          determined, is in effect for the entire string.  Setting the
2356          separator limit to the end of the string prevents
2357          bidi_paragraph_init from being called automatically on this
2358          string.  */
2359       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2360         bidi_it->separator_limit = bidi_it->string.schars;
2361       else if (bidi_it->bytepos < ZV_BYTE)
2362         {
2363           EMACS_INT sep_len
2364             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2365                                      bidi_it->bytepos + bidi_it->ch_len);
2366           if (bidi_it->nchars <= 0)
2367             abort ();
2368           if (sep_len >= 0)
2369             {
2370               bidi_it->new_paragraph = 1;
2371               /* Record the buffer position of the last character of the
2372                  paragraph separator.  */
2373               bidi_it->separator_limit
2374                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2375             }
2376         }
2377     }
2378
2379   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2380     {
2381       /* If we are at paragraph's base embedding level and beyond the
2382          last cached position, the cache's job is done and we can
2383          discard it.  */
2384       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2385           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2386                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2387         bidi_cache_reset ();
2388         /* But as long as we are caching during forward scan, we must
2389            cache each state, or else the cache integrity will be
2390            compromised: it assumes cached states correspond to buffer
2391            positions 1:1.  */
2392       else
2393         bidi_cache_iterator_state (bidi_it, 1);
2394     }
2395
2396   if (STRINGP (bidi_it->string.lstring))
2397     UNGCPRO;
2398 }
2399
2400 /* This is meant to be called from within the debugger, whenever you
2401    wish to examine the cache contents.  */
2402 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2403 void
2404 bidi_dump_cached_states (void)
2405 {
2406   ptrdiff_t i;
2407   int ndigits = 1;
2408
2409   if (bidi_cache_idx == 0)
2410     {
2411       fprintf (stderr, "The cache is empty.\n");
2412       return;
2413     }
2414   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2415            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2416
2417   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2418     ndigits++;
2419   fputs ("ch  ", stderr);
2420   for (i = 0; i < bidi_cache_idx; i++)
2421     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2422   fputs ("\n", stderr);
2423   fputs ("lvl ", stderr);
2424   for (i = 0; i < bidi_cache_idx; i++)
2425     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2426   fputs ("\n", stderr);
2427   fputs ("pos ", stderr);
2428   for (i = 0; i < bidi_cache_idx; i++)
2429     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2430   fputs ("\n", stderr);
2431 }