src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2013 Free Software
   3    Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications described in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59
  60 #include "lisp.h"
  61 #include "character.h"
  62 #include "buffer.h"
  63 #include "dispextern.h"
  64
  65 static bool bidi_initialized = 0;
  66
  67 static Lisp_Object bidi_type_table, bidi_mirror_table;
  68
  69 #define LRM_CHAR   0x200E
  70 #define RLM_CHAR   0x200F
  71 #define BIDI_EOB   -1
  72
  73 /* Data type for describing the bidirectional character categories.  */
  74 typedef enum {
  75   UNKNOWN_BC,
  76   NEUTRAL,
  77   WEAK,
  78   STRONG
  79 } bidi_category_t;
  80
  81 /* UAX#9 says to search only for L, AL, or R types of characters, and
  82    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  83    level.  Yudit indeed ignores them.  This variable is therefore set
  84    by default to ignore them, but clearing it will take them into
  85    account.  */
  86 extern bool bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  87 bool bidi_ignore_explicit_marks_for_paragraph_level = 1;
  88
  89 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  90 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  91
  92 \f
  93 /***********************************************************************
  94                         Utilities
  95  ***********************************************************************/
  96
  97 /* Return the bidi type of a character CH, subject to the current
  98    directional OVERRIDE.  */
  99 static bidi_type_t
 100 bidi_get_type (int ch, bidi_dir_t override)
 101 {
 102   bidi_type_t default_type;
 103
 104   if (ch == BIDI_EOB)
 105     return NEUTRAL_B;
 106   if (ch < 0 || ch > MAX_CHAR)
 107     emacs_abort ();
 108
 109   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 110   /* Every valid character code, even those that are unassigned by the
 111      UCD, have some bidi-class property, according to
 112      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 113      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 114   if (default_type == UNKNOWN_BT)
 115     emacs_abort ();
 116
 117   if (override == NEUTRAL_DIR)
 118     return default_type;
 119
 120   switch (default_type)
 121     {
 122       /* Although UAX#9 does not tell, it doesn't make sense to
 123          override NEUTRAL_B and LRM/RLM characters.  */
 124       case NEUTRAL_B:
 125       case LRE:
 126       case LRO:
 127       case RLE:
 128       case RLO:
 129       case PDF:
 130         return default_type;
 131       default:
 132         switch (ch)
 133           {
 134             case LRM_CHAR:
 135             case RLM_CHAR:
 136               return default_type;
 137             default:
 138               if (override == L2R) /* X6 */
 139                 return STRONG_L;
 140               else if (override == R2L)
 141                 return STRONG_R;
 142               else
 143                 emacs_abort (); /* can't happen: handled above */
 144           }
 145     }
 146 }
 147
 148 static void
 149 bidi_check_type (bidi_type_t type)
 150 {
 151   eassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 152 }
 153
 154 /* Given a bidi TYPE of a character, return its category.  */
 155 static bidi_category_t
 156 bidi_get_category (bidi_type_t type)
 157 {
 158   switch (type)
 159     {
 160       case UNKNOWN_BT:
 161         return UNKNOWN_BC;
 162       case STRONG_L:
 163       case STRONG_R:
 164       case STRONG_AL:
 165       case LRE:
 166       case LRO:
 167       case RLE:
 168       case RLO:
 169         return STRONG;
 170       case PDF:         /* ??? really?? */
 171       case WEAK_EN:
 172       case WEAK_ES:
 173       case WEAK_ET:
 174       case WEAK_AN:
 175       case WEAK_CS:
 176       case WEAK_NSM:
 177       case WEAK_BN:
 178         return WEAK;
 179       case NEUTRAL_B:
 180       case NEUTRAL_S:
 181       case NEUTRAL_WS:
 182       case NEUTRAL_ON:
 183         return NEUTRAL;
 184       default:
 185         emacs_abort ();
 186     }
 187 }
 188
 189 /* Return the mirrored character of C, if it has one.  If C has no
 190    mirrored counterpart, return C.
 191    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 192    context must be tested by the caller.  */
 193 int
 194 bidi_mirror_char (int c)
 195 {
 196   Lisp_Object val;
 197
 198   if (c == BIDI_EOB)
 199     return c;
 200   if (c < 0 || c > MAX_CHAR)
 201     emacs_abort ();
 202
 203   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 204   if (INTEGERP (val))
 205     {
 206       int v;
 207
 208       /* When debugging, check before assigning to V, so that the check
 209          isn't broken by undefined behavior due to int overflow.  */
 210       eassert (CHAR_VALID_P (XINT (val)));
 211
 212       v = XINT (val);
 213
 214       /* Minimal test we must do in optimized builds, to prevent weird
 215          crashes further down the road.  */
 216       if (v < 0 || v > MAX_CHAR)
 217         emacs_abort ();
 218
 219       return v;
 220     }
 221
 222   return c;
 223 }
 224
 225 /* Determine the start-of-run (sor) directional type given the two
 226    embedding levels on either side of the run boundary.  Also, update
 227    the saved info about previously seen characters, since that info is
 228    generally valid for a single level run.  */
 229 static void
 230 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 231 {
 232   int higher_level = (level_before > level_after ? level_before : level_after);
 233
 234   /* The prev_was_pdf gork is required for when we have several PDFs
 235      in a row.  In that case, we want to compute the sor type for the
 236      next level run only once: when we see the first PDF.  That's
 237      because the sor type depends only on the higher of the two levels
 238      that we find on the two sides of the level boundary (see UAX#9,
 239      clause X10), and so we don't need to know the final embedding
 240      level to which we descend after processing all the PDFs.  */
 241   if (!bidi_it->prev_was_pdf || level_before < level_after)
 242     /* FIXME: should the default sor direction be user selectable?  */
 243     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 244   if (level_before > level_after)
 245     bidi_it->prev_was_pdf = 1;
 246
 247   bidi_it->prev.type = UNKNOWN_BT;
 248   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 249     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 250   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 251   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 252   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 253   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 254     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 255   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 256 }
 257
 258 /* Push the current embedding level and override status; reset the
 259    current level to LEVEL and the current override status to OVERRIDE.  */
 260 static void
 261 bidi_push_embedding_level (struct bidi_it *bidi_it,
 262                            int level, bidi_dir_t override)
 263 {
 264   bidi_it->stack_idx++;
 265   eassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 266   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 267   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 268 }
 269
 270 /* Pop the embedding level and directional override status from the
 271    stack, and return the new level.  */
 272 static int
 273 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 274 {
 275   /* UAX#9 says to ignore invalid PDFs.  */
 276   if (bidi_it->stack_idx > 0)
 277     bidi_it->stack_idx--;
 278   return bidi_it->level_stack[bidi_it->stack_idx].level;
 279 }
 280
 281 /* Record in SAVED_INFO the information about the current character.  */
 282 static void
 283 bidi_remember_char (struct bidi_saved_info *saved_info,
 284                     struct bidi_it *bidi_it)
 285 {
 286   saved_info->charpos = bidi_it->charpos;
 287   saved_info->bytepos = bidi_it->bytepos;
 288   saved_info->type = bidi_it->type;
 289   bidi_check_type (bidi_it->type);
 290   saved_info->type_after_w1 = bidi_it->type_after_w1;
 291   bidi_check_type (bidi_it->type_after_w1);
 292   saved_info->orig_type = bidi_it->orig_type;
 293   bidi_check_type (bidi_it->orig_type);
 294 }
 295
 296 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 297    copies the part of the level stack that is actually in use.  */
 298 static void
 299 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 300 {
 301   /* Copy everything from the start through the active part of
 302      the level stack.  */
 303   memcpy (to, from,
 304           (offsetof (struct bidi_it, level_stack[1])
 305            + from->stack_idx * sizeof from->level_stack[0]));
 306 }
 307
 308 \f
 309 /***********************************************************************
 310                         Caching the bidi iterator states
 311  ***********************************************************************/
 312
 313 #define BIDI_CACHE_CHUNK 200
 314 static struct bidi_it *bidi_cache;
 315 static ptrdiff_t bidi_cache_size = 0;
 316 enum { elsz = sizeof (struct bidi_it) };
 317 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 318 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 319 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 320                                            "stack" level */
 321
 322 /* 5-slot stack for saving the start of the previous level of the
 323    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 324    and we need the same size of our stack.  */
 325 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 326 static int bidi_cache_sp;
 327
 328 /* Size of header used by bidi_shelve_cache.  */
 329 enum
 330   {
 331     bidi_shelve_header_size
 332       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 333          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 334          + sizeof (bidi_cache_last_idx))
 335   };
 336
 337 /* Reset the cache state to the empty state.  We only reset the part
 338    of the cache relevant to iteration of the current object.  Previous
 339    objects, which are pushed on the display iterator's stack, are left
 340    intact.  This is called when the cached information is no more
 341    useful for the current iteration, e.g. when we were reseated to a
 342    new position on the same object.  */
 343 static void
 344 bidi_cache_reset (void)
 345 {
 346   bidi_cache_idx = bidi_cache_start;
 347   bidi_cache_last_idx = -1;
 348 }
 349
 350 /* Shrink the cache to its minimal size.  Called when we init the bidi
 351    iterator for reordering a buffer or a string that does not come
 352    from display properties, because that means all the previously
 353    cached info is of no further use.  */
 354 static void
 355 bidi_cache_shrink (void)
 356 {
 357   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 358     {
 359       bidi_cache = xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 360       bidi_cache_size = BIDI_CACHE_CHUNK;
 361     }
 362   bidi_cache_reset ();
 363 }
 364
 365 static void
 366 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 367 {
 368   int current_scan_dir = bidi_it->scan_dir;
 369
 370   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 371     emacs_abort ();
 372
 373   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 374   bidi_it->scan_dir = current_scan_dir;
 375   bidi_cache_last_idx = idx;
 376 }
 377
 378 /* Find a cached state with a given CHARPOS and resolved embedding
 379    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 380    resolved levels in cached states.  DIR, if non-zero, means search
 381    in that direction from the last cache hit.  */
 382 static ptrdiff_t
 383 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
 384 {
 385   ptrdiff_t i, i_start;
 386
 387   if (bidi_cache_idx > bidi_cache_start)
 388     {
 389       if (bidi_cache_last_idx == -1)
 390         bidi_cache_last_idx = bidi_cache_idx - 1;
 391       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 392         {
 393           dir = -1;
 394           i_start = bidi_cache_last_idx - 1;
 395         }
 396       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 397                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 398         {
 399           dir = 1;
 400           i_start = bidi_cache_last_idx + 1;
 401         }
 402       else if (dir)
 403         i_start = bidi_cache_last_idx;
 404       else
 405         {
 406           dir = -1;
 407           i_start = bidi_cache_idx - 1;
 408         }
 409
 410       if (dir < 0)
 411         {
 412           /* Linear search for now; FIXME!  */
 413           for (i = i_start; i >= bidi_cache_start; i--)
 414             if (bidi_cache[i].charpos <= charpos
 415                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 416                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 417               return i;
 418         }
 419       else
 420         {
 421           for (i = i_start; i < bidi_cache_idx; i++)
 422             if (bidi_cache[i].charpos <= charpos
 423                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 424                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 425               return i;
 426         }
 427     }
 428
 429   return -1;
 430 }
 431
 432 /* Find a cached state where the resolved level changes to a value
 433    that is lower than LEVEL, and return its cache slot index.  DIR is
 434    the direction to search, starting with the last used cache slot.
 435    If DIR is zero, we search backwards from the last occupied cache
 436    slot.  BEFORE means return the index of the slot that
 437    is ``before'' the level change in the search direction.  That is,
 438    given the cached levels like this:
 439
 440          1122333442211
 441           AB        C
 442
 443    and assuming we are at the position cached at the slot marked with
 444    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 445    index of slot B or A, depending whether BEFORE is, respectively,
 446    true or false.  */
 447 static ptrdiff_t
 448 bidi_cache_find_level_change (int level, int dir, bool before)
 449 {
 450   if (bidi_cache_idx)
 451     {
 452       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 453       int incr = before ? 1 : 0;
 454
 455       eassert (!dir || bidi_cache_last_idx >= 0);
 456
 457       if (!dir)
 458         dir = -1;
 459       else if (!incr)
 460         i += dir;
 461
 462       if (dir < 0)
 463         {
 464           while (i >= bidi_cache_start + incr)
 465             {
 466               if (bidi_cache[i - incr].resolved_level >= 0
 467                   && bidi_cache[i - incr].resolved_level < level)
 468                 return i;
 469               i--;
 470             }
 471         }
 472       else
 473         {
 474           while (i < bidi_cache_idx - incr)
 475             {
 476               if (bidi_cache[i + incr].resolved_level >= 0
 477                   && bidi_cache[i + incr].resolved_level < level)
 478                 return i;
 479               i++;
 480             }
 481         }
 482     }
 483
 484   return -1;
 485 }
 486
 487 static void
 488 bidi_cache_ensure_space (ptrdiff_t idx)
 489 {
 490   /* Enlarge the cache as needed.  */
 491   if (idx >= bidi_cache_size)
 492     {
 493       /* The bidi cache cannot be larger than the largest Lisp string
 494          or buffer.  */
 495       ptrdiff_t string_or_buffer_bound
 496         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 497
 498       /* Also, it cannot be larger than what C can represent.  */
 499       ptrdiff_t c_bound
 500         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 501
 502       bidi_cache
 503         = xpalloc (bidi_cache, &bidi_cache_size,
 504                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 505                    min (string_or_buffer_bound, c_bound), elsz);
 506     }
 507 }
 508
 509 static void
 510 bidi_cache_iterator_state (struct bidi_it *bidi_it, bool resolved)
 511 {
 512   ptrdiff_t idx;
 513
 514   /* We should never cache on backward scans.  */
 515   if (bidi_it->scan_dir == -1)
 516     emacs_abort ();
 517   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 518
 519   if (idx < 0)
 520     {
 521       idx = bidi_cache_idx;
 522       bidi_cache_ensure_space (idx);
 523       /* Character positions should correspond to cache positions 1:1.
 524          If we are outside the range of cached positions, the cache is
 525          useless and must be reset.  */
 526       if (idx > bidi_cache_start &&
 527           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 528                                + bidi_cache[idx - 1].nchars)
 529            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 530         {
 531           bidi_cache_reset ();
 532           idx = bidi_cache_start;
 533         }
 534       if (bidi_it->nchars <= 0)
 535         emacs_abort ();
 536       bidi_copy_it (&bidi_cache[idx], bidi_it);
 537       if (!resolved)
 538         bidi_cache[idx].resolved_level = -1;
 539     }
 540   else
 541     {
 542       /* Copy only the members which could have changed, to avoid
 543          costly copying of the entire struct.  */
 544       bidi_cache[idx].type = bidi_it->type;
 545       bidi_check_type (bidi_it->type);
 546       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 547       bidi_check_type (bidi_it->type_after_w1);
 548       if (resolved)
 549         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 550       else
 551         bidi_cache[idx].resolved_level = -1;
 552       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 553       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 554       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 555       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 556       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 557       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 558       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 559     }
 560
 561   bidi_cache_last_idx = idx;
 562   if (idx >= bidi_cache_idx)
 563     bidi_cache_idx = idx + 1;
 564 }
 565
 566 static bidi_type_t
 567 bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
 568 {
 569   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 570
 571   if (i >= bidi_cache_start)
 572     {
 573       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 574
 575       bidi_copy_it (bidi_it, &bidi_cache[i]);
 576       bidi_cache_last_idx = i;
 577       /* Don't let scan direction from the cached state override
 578          the current scan direction.  */
 579       bidi_it->scan_dir = current_scan_dir;
 580       return bidi_it->type;
 581     }
 582
 583   return UNKNOWN_BT;
 584 }
 585
 586 static int
 587 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 588 {
 589   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 590     emacs_abort ();
 591   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 592 }
 593
 594 \f
 595 /***********************************************************************
 596              Pushing and popping the bidi iterator state
 597  ***********************************************************************/
 598
 599 /* Push the bidi iterator state in preparation for reordering a
 600    different object, e.g. display string found at certain buffer
 601    position.  Pushing the bidi iterator boils down to saving its
 602    entire state on the cache and starting a new cache "stacked" on top
 603    of the current cache.  */
 604 void
 605 bidi_push_it (struct bidi_it *bidi_it)
 606 {
 607   /* Save the current iterator state in its entirety after the last
 608      used cache slot.  */
 609   bidi_cache_ensure_space (bidi_cache_idx);
 610   bidi_cache[bidi_cache_idx++] = *bidi_it;
 611
 612   /* Push the current cache start onto the stack.  */
 613   eassert (bidi_cache_sp < IT_STACK_SIZE);
 614   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 615
 616   /* Start a new level of cache, and make it empty.  */
 617   bidi_cache_start = bidi_cache_idx;
 618   bidi_cache_last_idx = -1;
 619 }
 620
 621 /* Restore the iterator state saved by bidi_push_it and return the
 622    cache to the corresponding state.  */
 623 void
 624 bidi_pop_it (struct bidi_it *bidi_it)
 625 {
 626   if (bidi_cache_start <= 0)
 627     emacs_abort ();
 628
 629   /* Reset the next free cache slot index to what it was before the
 630      call to bidi_push_it.  */
 631   bidi_cache_idx = bidi_cache_start - 1;
 632
 633   /* Restore the bidi iterator state saved in the cache.  */
 634   *bidi_it = bidi_cache[bidi_cache_idx];
 635
 636   /* Pop the previous cache start from the stack.  */
 637   if (bidi_cache_sp <= 0)
 638     emacs_abort ();
 639   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 640
 641   /* Invalidate the last-used cache slot data.  */
 642   bidi_cache_last_idx = -1;
 643 }
 644
 645 static ptrdiff_t bidi_cache_total_alloc;
 646
 647 /* Stash away a copy of the cache and its control variables.  */
 648 void *
 649 bidi_shelve_cache (void)
 650 {
 651   unsigned char *databuf;
 652   ptrdiff_t alloc;
 653
 654   /* Empty cache.  */
 655   if (bidi_cache_idx == 0)
 656     return NULL;
 657
 658   alloc = (bidi_shelve_header_size
 659            + bidi_cache_idx * sizeof (struct bidi_it));
 660   databuf = xmalloc (alloc);
 661   bidi_cache_total_alloc += alloc;
 662
 663   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 664   memcpy (databuf + sizeof (bidi_cache_idx),
 665           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 666   memcpy (databuf + sizeof (bidi_cache_idx)
 667           + bidi_cache_idx * sizeof (struct bidi_it),
 668           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 669   memcpy (databuf + sizeof (bidi_cache_idx)
 670           + bidi_cache_idx * sizeof (struct bidi_it)
 671           + sizeof (bidi_cache_start_stack),
 672           &bidi_cache_sp, sizeof (bidi_cache_sp));
 673   memcpy (databuf + sizeof (bidi_cache_idx)
 674           + bidi_cache_idx * sizeof (struct bidi_it)
 675           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 676           &bidi_cache_start, sizeof (bidi_cache_start));
 677   memcpy (databuf + sizeof (bidi_cache_idx)
 678           + bidi_cache_idx * sizeof (struct bidi_it)
 679           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 680           + sizeof (bidi_cache_start),
 681           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 682
 683   return databuf;
 684 }
 685
 686 /* Restore the cache state from a copy stashed away by
 687    bidi_shelve_cache, and free the buffer used to stash that copy.
 688    JUST_FREE means free the buffer, but don't restore the
 689    cache; used when the corresponding iterator is discarded instead of
 690    being restored.  */
 691 void
 692 bidi_unshelve_cache (void *databuf, bool just_free)
 693 {
 694   unsigned char *p = databuf;
 695
 696   if (!p)
 697     {
 698       if (!just_free)
 699         {
 700           /* A NULL pointer means an empty cache.  */
 701           bidi_cache_start = 0;
 702           bidi_cache_sp = 0;
 703           bidi_cache_reset ();
 704         }
 705     }
 706   else
 707     {
 708       if (just_free)
 709         {
 710           ptrdiff_t idx;
 711
 712           memcpy (&idx, p, sizeof (bidi_cache_idx));
 713           bidi_cache_total_alloc
 714             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 715         }
 716       else
 717         {
 718           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 719           bidi_cache_ensure_space (bidi_cache_idx);
 720           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 721                   bidi_cache_idx * sizeof (struct bidi_it));
 722           memcpy (bidi_cache_start_stack,
 723                   p + sizeof (bidi_cache_idx)
 724                   + bidi_cache_idx * sizeof (struct bidi_it),
 725                   sizeof (bidi_cache_start_stack));
 726           memcpy (&bidi_cache_sp,
 727                   p + sizeof (bidi_cache_idx)
 728                   + bidi_cache_idx * sizeof (struct bidi_it)
 729                   + sizeof (bidi_cache_start_stack),
 730                   sizeof (bidi_cache_sp));
 731           memcpy (&bidi_cache_start,
 732                   p + sizeof (bidi_cache_idx)
 733                   + bidi_cache_idx * sizeof (struct bidi_it)
 734                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 735                   sizeof (bidi_cache_start));
 736           memcpy (&bidi_cache_last_idx,
 737                   p + sizeof (bidi_cache_idx)
 738                   + bidi_cache_idx * sizeof (struct bidi_it)
 739                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 740                   + sizeof (bidi_cache_start),
 741                   sizeof (bidi_cache_last_idx));
 742           bidi_cache_total_alloc
 743             -= (bidi_shelve_header_size
 744                 + bidi_cache_idx * sizeof (struct bidi_it));
 745         }
 746
 747       xfree (p);
 748     }
 749 }
 750
 751 \f
 752 /***********************************************************************
 753                         Initialization
 754  ***********************************************************************/
 755 static void
 756 bidi_initialize (void)
 757 {
 758   bidi_type_table = uniprop_table (intern ("bidi-class"));
 759   if (NILP (bidi_type_table))
 760     emacs_abort ();
 761   staticpro (&bidi_type_table);
 762
 763   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 764   if (NILP (bidi_mirror_table))
 765     emacs_abort ();
 766   staticpro (&bidi_mirror_table);
 767
 768   Qparagraph_start = intern ("paragraph-start");
 769   staticpro (&Qparagraph_start);
 770   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 771   if (!STRINGP (paragraph_start_re))
 772     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 773   staticpro (&paragraph_start_re);
 774   Qparagraph_separate = intern ("paragraph-separate");
 775   staticpro (&Qparagraph_separate);
 776   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 777   if (!STRINGP (paragraph_separate_re))
 778     paragraph_separate_re = build_string ("[ \t\f]*$");
 779   staticpro (&paragraph_separate_re);
 780
 781   bidi_cache_sp = 0;
 782   bidi_cache_total_alloc = 0;
 783
 784   bidi_initialized = 1;
 785 }
 786
 787 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 788    end.  */
 789 static void
 790 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 791 {
 792   bidi_it->invalid_levels = 0;
 793   bidi_it->invalid_rl_levels = -1;
 794   bidi_it->stack_idx = 0;
 795   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 796 }
 797
 798 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 799 void
 800 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, bool frame_window_p,
 801               struct bidi_it *bidi_it)
 802 {
 803   if (! bidi_initialized)
 804     bidi_initialize ();
 805   if (charpos >= 0)
 806     bidi_it->charpos = charpos;
 807   if (bytepos >= 0)
 808     bidi_it->bytepos = bytepos;
 809   bidi_it->frame_window_p = frame_window_p;
 810   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 811   bidi_it->first_elt = 1;
 812   bidi_set_paragraph_end (bidi_it);
 813   bidi_it->new_paragraph = 1;
 814   bidi_it->separator_limit = -1;
 815   bidi_it->type = NEUTRAL_B;
 816   bidi_it->type_after_w1 = NEUTRAL_B;
 817   bidi_it->orig_type = NEUTRAL_B;
 818   bidi_it->prev_was_pdf = 0;
 819   bidi_it->prev.type = bidi_it->prev.type_after_w1
 820     = bidi_it->prev.orig_type = UNKNOWN_BT;
 821   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 822     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 823   bidi_it->next_for_neutral.charpos = -1;
 824   bidi_it->next_for_neutral.type
 825     = bidi_it->next_for_neutral.type_after_w1
 826     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 827   bidi_it->prev_for_neutral.charpos = -1;
 828   bidi_it->prev_for_neutral.type
 829     = bidi_it->prev_for_neutral.type_after_w1
 830     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 831   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 832   bidi_it->disp_pos = -1;       /* invalid/unknown */
 833   bidi_it->disp_prop = 0;
 834   /* We can only shrink the cache if we are at the bottom level of its
 835      "stack".  */
 836   if (bidi_cache_start == 0)
 837     bidi_cache_shrink ();
 838   else
 839     bidi_cache_reset ();
 840 }
 841
 842 /* Perform initializations for reordering a new line of bidi text.  */
 843 static void
 844 bidi_line_init (struct bidi_it *bidi_it)
 845 {
 846   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 847   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 848   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 849   bidi_it->invalid_levels = 0;
 850   bidi_it->invalid_rl_levels = -1;
 851   /* Setting this to zero will force its recomputation the first time
 852      we need it for W5.  */
 853   bidi_it->next_en_pos = 0;
 854   bidi_it->next_en_type = UNKNOWN_BT;
 855   bidi_it->next_for_ws.type = UNKNOWN_BT;
 856   bidi_set_sor_type (bidi_it,
 857                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 858                      bidi_it->level_stack[0].level); /* X10 */
 859
 860   bidi_cache_reset ();
 861 }
 862
 863 \f
 864 /***********************************************************************
 865                         Fetching characters
 866  ***********************************************************************/
 867
 868 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 869    are zero-based character positions in S, BEGBYTE is byte position
 870    corresponding to BEG.  UNIBYTE means S is a unibyte string.  */
 871 static ptrdiff_t
 872 bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
 873                   const ptrdiff_t begbyte, const ptrdiff_t end, bool unibyte)
 874 {
 875   ptrdiff_t pos = beg;
 876   const unsigned char *p = s + begbyte, *start = p;
 877
 878   if (unibyte)
 879     p = s + end;
 880   else
 881     {
 882       if (!CHAR_HEAD_P (*p))
 883         emacs_abort ();
 884
 885       while (pos < end)
 886         {
 887           p += BYTES_BY_CHAR_HEAD (*p);
 888           pos++;
 889         }
 890     }
 891
 892   return p - start;
 893 }
 894
 895 /* Fetch and return the character at byte position BYTEPOS.  If S is
 896    non-NULL, fetch the character from string S; otherwise fetch the
 897    character from the current buffer.  UNIBYTE means S is a
 898    unibyte string.  */
 899 static int
 900 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, bool unibyte)
 901 {
 902   if (s)
 903     {
 904       s += bytepos;
 905       if (unibyte)
 906         return *s;
 907     }
 908   else
 909     s = BYTE_POS_ADDR (bytepos);
 910   return STRING_CHAR (s);
 911 }
 912
 913 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 914    character is covered by a display string, treat the entire run of
 915    covered characters as a single character, either u+2029 or u+FFFC,
 916    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 917    specifies the character position of the next display string, or -1
 918    if not yet computed.  When the next character is at or beyond that
 919    position, the function updates DISP_POS with the position of the
 920    next display string.  *DISP_PROP non-zero means that there's really
 921    a display string at DISP_POS, as opposed to when we searched till
 922    DISP_POS without finding one.  If *DISP_PROP is 2, it means the
 923    display spec is of the form `(space ...)', which is replaced with
 924    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 925    string to iterate, or NULL if iterating over a buffer or a Lisp
 926    string; in the latter case, STRING->lstring is the Lisp string.  */
 927 static int
 928 bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
 929                  int *disp_prop, struct bidi_string_data *string,
 930                  bool frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
 931 {
 932   int ch;
 933   ptrdiff_t endpos
 934     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 935   struct text_pos pos;
 936   int len;
 937
 938   /* If we got past the last known position of display string, compute
 939      the position of the next one.  That position could be at CHARPOS.  */
 940   if (charpos < endpos && charpos > *disp_pos)
 941     {
 942       SET_TEXT_POS (pos, charpos, bytepos);
 943       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 944                                               disp_prop);
 945     }
 946
 947   /* Fetch the character at BYTEPOS.  */
 948   if (charpos >= endpos)
 949     {
 950       ch = BIDI_EOB;
 951       *ch_len = 1;
 952       *nchars = 1;
 953       *disp_pos = endpos;
 954       *disp_prop = 0;
 955     }
 956   else if (charpos >= *disp_pos && *disp_prop)
 957     {
 958       ptrdiff_t disp_end_pos;
 959
 960       /* We don't expect to find ourselves in the middle of a display
 961          property.  Hopefully, it will never be needed.  */
 962       if (charpos > *disp_pos)
 963         emacs_abort ();
 964       /* Text covered by `display' properties and overlays with
 965          display properties or display strings is handled as a single
 966          character that represents the entire run of characters
 967          covered by the display property.  */
 968       if (*disp_prop == 2)
 969         {
 970           /* `(space ...)' display specs are handled as paragraph
 971              separators for the purposes of the reordering; see UAX#9
 972              section 3 and clause HL1 in section 4.3 there.  */
 973           ch = 0x2029;
 974         }
 975       else
 976         {
 977           /* All other display specs are handled as the Unicode Object
 978              Replacement Character.  */
 979           ch = 0xFFFC;
 980         }
 981       disp_end_pos = compute_display_string_end (*disp_pos, string);
 982       if (disp_end_pos < 0)
 983         {
 984           /* Somebody removed the display string from the buffer
 985              behind our back.  Recover by processing this buffer
 986              position as if no display property were present there to
 987              begin with.  */
 988           *disp_prop = 0;
 989           goto normal_char;
 990         }
 991       *nchars = disp_end_pos - *disp_pos;
 992       if (*nchars <= 0)
 993         emacs_abort ();
 994       if (string->s)
 995         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 996                                     disp_end_pos, string->unibyte);
 997       else if (STRINGP (string->lstring))
 998         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 999                                     bytepos, disp_end_pos, string->unibyte);
1000       else
1001         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1002     }
1003   else
1004     {
1005     normal_char:
1006       if (string->s)
1007         {
1008
1009           if (!string->unibyte)
1010             {
1011               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1012               *ch_len = len;
1013             }
1014           else
1015             {
1016               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1017               *ch_len = 1;
1018             }
1019         }
1020       else if (STRINGP (string->lstring))
1021         {
1022           if (!string->unibyte)
1023             {
1024               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1025                                            len);
1026               *ch_len = len;
1027             }
1028           else
1029             {
1030               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1031               *ch_len = 1;
1032             }
1033         }
1034       else
1035         {
1036           ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1037           *ch_len = len;
1038         }
1039       *nchars = 1;
1040     }
1041
1042   /* If we just entered a run of characters covered by a display
1043      string, compute the position of the next display string.  */
1044   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1045       && *disp_prop)
1046     {
1047       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1048       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1049                                               disp_prop);
1050     }
1051
1052   return ch;
1053 }
1054
1055 \f
1056 /***********************************************************************
1057                         Determining paragraph direction
1058  ***********************************************************************/
1059
1060 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1061    Value is the non-negative length of the paragraph separator
1062    following the buffer position, -1 if position is at the beginning
1063    of a new paragraph, or -2 if position is neither at beginning nor
1064    at end of a paragraph.  */
1065 static ptrdiff_t
1066 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1067 {
1068   Lisp_Object sep_re;
1069   Lisp_Object start_re;
1070   ptrdiff_t val;
1071
1072   sep_re = paragraph_separate_re;
1073   start_re = paragraph_start_re;
1074
1075   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1076   if (val < 0)
1077     {
1078       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1079         val = -1;
1080       else
1081         val = -2;
1082     }
1083
1084   return val;
1085 }
1086
1087 /* On my 2005-vintage machine, searching back for paragraph start
1088    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1089    when user types C-p.  The number below limits each call to
1090    bidi_paragraph_init to about 10 ms.  */
1091 #define MAX_PARAGRAPH_SEARCH 7500
1092
1093 /* Find the beginning of this paragraph by looking back in the buffer.
1094    Value is the byte position of the paragraph's beginning, or
1095    BEGV_BYTE if paragraph_start_re is still not found after looking
1096    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1097 static ptrdiff_t
1098 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1099 {
1100   Lisp_Object re = paragraph_start_re;
1101   ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1102   ptrdiff_t n = 0;
1103
1104   while (pos_byte > BEGV_BYTE
1105          && n++ < MAX_PARAGRAPH_SEARCH
1106          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1107     /* FIXME: What if the paragraph beginning is covered by a
1108        display string?  And what if a display string covering some
1109        of the text over which we scan back includes
1110        paragraph_start_re?  */
1111     pos = find_next_newline (pos - 1, -1, &pos_byte);
1112   if (n >= MAX_PARAGRAPH_SEARCH)
1113     pos_byte = BEGV_BYTE;
1114   return pos_byte;
1115 }
1116
1117 /* On a 3.4 GHz machine, searching forward for a strong directional
1118    character in a long paragraph full of weaks or neutrals takes about
1119    1 ms for each 20K characters.  The number below limits each call to
1120    bidi_paragraph_init to less than 10 ms even on slow machines.  */
1121 #define MAX_STRONG_CHAR_SEARCH 100000
1122
1123 /* Determine the base direction, a.k.a. base embedding level, of the
1124    paragraph we are about to iterate through.  If DIR is either L2R or
1125    R2L, just use that.  Otherwise, determine the paragraph direction
1126    from the first strong directional character of the paragraph.
1127
1128    NO_DEFAULT_P means don't default to L2R if the paragraph
1129    has no strong directional characters and both DIR and
1130    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1131    in the buffer until a paragraph is found with a strong character,
1132    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1133    flag is used in current-bidi-paragraph-direction.
1134
1135    Note that this function gives the paragraph separator the same
1136    direction as the preceding paragraph, even though Emacs generally
1137    views the separator as not belonging to any paragraph.  */
1138 void
1139 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
1140 {
1141   ptrdiff_t bytepos = bidi_it->bytepos;
1142   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1143   ptrdiff_t pstartbyte;
1144   /* Note that begbyte is a byte position, while end is a character
1145      position.  Yes, this is ugly, but we are trying to avoid costly
1146      calls to BYTE_TO_CHAR and its ilk.  */
1147   ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1148   ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1149
1150   /* Special case for an empty buffer. */
1151   if (bytepos == begbyte && bidi_it->charpos == end)
1152     dir = L2R;
1153   /* We should never be called at EOB or before BEGV.  */
1154   else if (bidi_it->charpos >= end || bytepos < begbyte)
1155     emacs_abort ();
1156
1157   if (dir == L2R)
1158     {
1159       bidi_it->paragraph_dir = L2R;
1160       bidi_it->new_paragraph = 0;
1161     }
1162   else if (dir == R2L)
1163     {
1164       bidi_it->paragraph_dir = R2L;
1165       bidi_it->new_paragraph = 0;
1166     }
1167   else if (dir == NEUTRAL_DIR)  /* P2 */
1168     {
1169       int ch;
1170       ptrdiff_t ch_len, nchars;
1171       ptrdiff_t pos, disp_pos = -1;
1172       int disp_prop = 0;
1173       bidi_type_t type;
1174       const unsigned char *s;
1175
1176       if (!bidi_initialized)
1177         bidi_initialize ();
1178
1179       /* If we are inside a paragraph separator, we are just waiting
1180          for the separator to be exhausted; use the previous paragraph
1181          direction.  But don't do that if we have been just reseated,
1182          because we need to reinitialize below in that case.  */
1183       if (!bidi_it->first_elt
1184           && bidi_it->charpos < bidi_it->separator_limit)
1185         return;
1186
1187       /* If we are on a newline, get past it to where the next
1188          paragraph might start.  But don't do that at BEGV since then
1189          we are potentially in a new paragraph that doesn't yet
1190          exist.  */
1191       pos = bidi_it->charpos;
1192       s = (STRINGP (bidi_it->string.lstring)
1193            ? SDATA (bidi_it->string.lstring)
1194            : bidi_it->string.s);
1195       if (bytepos > begbyte
1196           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1197         {
1198           bytepos++;
1199           pos++;
1200         }
1201
1202       /* We are either at the beginning of a paragraph or in the
1203          middle of it.  Find where this paragraph starts.  */
1204       if (string_p)
1205         {
1206           /* We don't support changes of paragraph direction inside a
1207              string.  It is treated as a single paragraph.  */
1208           pstartbyte = 0;
1209         }
1210       else
1211         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1212       bidi_it->separator_limit = -1;
1213       bidi_it->new_paragraph = 0;
1214
1215       /* The following loop is run more than once only if NO_DEFAULT_P,
1216          and only if we are iterating on a buffer.  */
1217       do {
1218         ptrdiff_t pos1;
1219
1220         bytepos = pstartbyte;
1221         if (!string_p)
1222           pos = BYTE_TO_CHAR (bytepos);
1223         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1224                               &bidi_it->string,
1225                               bidi_it->frame_window_p, &ch_len, &nchars);
1226         type = bidi_get_type (ch, NEUTRAL_DIR);
1227
1228         pos1 = pos;
1229         for (pos += nchars, bytepos += ch_len;
1230              ((bidi_get_category (type) != STRONG)
1231               || (bidi_ignore_explicit_marks_for_paragraph_level
1232                   && (type == RLE || type == RLO
1233                       || type == LRE || type == LRO)))
1234                /* Stop when searched too far into an abnormally large
1235                   paragraph full of weak or neutral characters.  */
1236                && pos - pos1 < MAX_STRONG_CHAR_SEARCH;
1237              type = bidi_get_type (ch, NEUTRAL_DIR))
1238           {
1239             if (pos >= end)
1240               {
1241                 /* Pretend there's a paragraph separator at end of
1242                    buffer/string.  */
1243                 type = NEUTRAL_B;
1244                 break;
1245               }
1246             if (!string_p
1247                 && type == NEUTRAL_B
1248                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1249               break;
1250             /* Fetch next character and advance to get past it.  */
1251             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1252                                   &disp_prop, &bidi_it->string,
1253                                   bidi_it->frame_window_p, &ch_len, &nchars);
1254             pos += nchars;
1255             bytepos += ch_len;
1256           }
1257         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1258             || (!bidi_ignore_explicit_marks_for_paragraph_level
1259                 && (type == RLO || type == RLE)))
1260           bidi_it->paragraph_dir = R2L;
1261         else if (type == STRONG_L
1262                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1263                      && (type == LRO || type == LRE)))
1264           bidi_it->paragraph_dir = L2R;
1265         if (!string_p
1266             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1267           {
1268             /* If this paragraph is at BEGV, default to L2R.  */
1269             if (pstartbyte == BEGV_BYTE)
1270               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1271             else
1272               {
1273                 ptrdiff_t prevpbyte = pstartbyte;
1274                 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1275
1276                 /* Find the beginning of the previous paragraph, if any.  */
1277                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1278                   {
1279                     /* FXIME: What if p is covered by a display
1280                        string?  See also a FIXME inside
1281                        bidi_find_paragraph_start.  */
1282                     p--;
1283                     pbyte = CHAR_TO_BYTE (p);
1284                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1285                   }
1286                 pstartbyte = prevpbyte;
1287               }
1288           }
1289       } while (!string_p
1290                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1291     }
1292   else
1293     emacs_abort ();
1294
1295   /* Contrary to UAX#9 clause P3, we only default the paragraph
1296      direction to L2R if we have no previous usable paragraph
1297      direction.  This is allowed by the HL1 clause.  */
1298   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1299     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1300   if (bidi_it->paragraph_dir == R2L)
1301     bidi_it->level_stack[0].level = 1;
1302   else
1303     bidi_it->level_stack[0].level = 0;
1304
1305   bidi_line_init (bidi_it);
1306 }
1307
1308 \f
1309 /***********************************************************************
1310                  Resolving explicit and implicit levels.
1311   The rest of this file constitutes the core of the UBA implementation.
1312  ***********************************************************************/
1313
1314 static bool
1315 bidi_explicit_dir_char (int ch)
1316 {
1317   bidi_type_t ch_type;
1318
1319   if (!bidi_initialized)
1320     emacs_abort ();
1321   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1322   return (ch_type == LRE || ch_type == LRO
1323           || ch_type == RLE || ch_type == RLO
1324           || ch_type == PDF);
1325 }
1326
1327 /* A helper function for bidi_resolve_explicit.  It advances to the
1328    next character in logical order and determines the new embedding
1329    level and directional override, but does not take into account
1330    empty embeddings.  */
1331 static int
1332 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1333 {
1334   int curchar;
1335   bidi_type_t type;
1336   int current_level;
1337   int new_level;
1338   bidi_dir_t override;
1339   bool string_p = bidi_it->string.s || STRINGP (bidi_it->string.lstring);
1340
1341   /* If reseat()'ed, don't advance, so as to start iteration from the
1342      position where we were reseated.  bidi_it->bytepos can be less
1343      than BEGV_BYTE after reseat to BEGV.  */
1344   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1345       || bidi_it->first_elt)
1346     {
1347       bidi_it->first_elt = 0;
1348       if (string_p)
1349         {
1350           const unsigned char *p
1351             = (STRINGP (bidi_it->string.lstring)
1352                ? SDATA (bidi_it->string.lstring)
1353                : bidi_it->string.s);
1354
1355           if (bidi_it->charpos < 0)
1356             bidi_it->charpos = bidi_it->bytepos = 0;
1357           eassert (bidi_it->bytepos == bidi_count_bytes (p, 0, 0,
1358                                                          bidi_it->charpos,
1359                                                          bidi_it->string.unibyte));
1360         }
1361       else
1362         {
1363           if (bidi_it->charpos < BEGV)
1364             {
1365               bidi_it->charpos = BEGV;
1366               bidi_it->bytepos = BEGV_BYTE;
1367             }
1368           eassert (bidi_it->bytepos == CHAR_TO_BYTE (bidi_it->charpos));
1369         }
1370     }
1371   /* Don't move at end of buffer/string.  */
1372   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1373     {
1374       /* Advance to the next character, skipping characters covered by
1375          display strings (nchars > 1).  */
1376       if (bidi_it->nchars <= 0)
1377         emacs_abort ();
1378       bidi_it->charpos += bidi_it->nchars;
1379       if (bidi_it->ch_len == 0)
1380         emacs_abort ();
1381       bidi_it->bytepos += bidi_it->ch_len;
1382     }
1383
1384   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1385   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1386   new_level = current_level;
1387
1388   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1389     {
1390       curchar = BIDI_EOB;
1391       bidi_it->ch_len = 1;
1392       bidi_it->nchars = 1;
1393       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1394       bidi_it->disp_prop = 0;
1395     }
1396   else
1397     {
1398       /* Fetch the character at BYTEPOS.  If it is covered by a
1399          display string, treat the entire run of covered characters as
1400          a single character u+FFFC.  */
1401       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1402                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1403                                  &bidi_it->string, bidi_it->frame_window_p,
1404                                  &bidi_it->ch_len, &bidi_it->nchars);
1405     }
1406   bidi_it->ch = curchar;
1407
1408   /* Don't apply directional override here, as all the types we handle
1409      below will not be affected by the override anyway, and we need
1410      the original type unaltered.  The override will be applied in
1411      bidi_resolve_weak.  */
1412   type = bidi_get_type (curchar, NEUTRAL_DIR);
1413   bidi_it->orig_type = type;
1414   bidi_check_type (bidi_it->orig_type);
1415
1416   if (type != PDF)
1417     bidi_it->prev_was_pdf = 0;
1418
1419   bidi_it->type_after_w1 = UNKNOWN_BT;
1420
1421   switch (type)
1422     {
1423       case RLE: /* X2 */
1424       case RLO: /* X4 */
1425         bidi_it->type_after_w1 = type;
1426         bidi_check_type (bidi_it->type_after_w1);
1427         type = WEAK_BN; /* X9/Retaining */
1428         if (bidi_it->ignore_bn_limit <= -1)
1429           {
1430             if (current_level <= BIDI_MAXLEVEL - 4)
1431               {
1432                 /* Compute the least odd embedding level greater than
1433                    the current level.  */
1434                 new_level = ((current_level + 1) & ~1) + 1;
1435                 if (bidi_it->type_after_w1 == RLE)
1436                   override = NEUTRAL_DIR;
1437                 else
1438                   override = R2L;
1439                 if (current_level == BIDI_MAXLEVEL - 4)
1440                   bidi_it->invalid_rl_levels = 0;
1441                 bidi_push_embedding_level (bidi_it, new_level, override);
1442               }
1443             else
1444               {
1445                 bidi_it->invalid_levels++;
1446                 /* See the commentary about invalid_rl_levels below.  */
1447                 if (bidi_it->invalid_rl_levels < 0)
1448                   bidi_it->invalid_rl_levels = 0;
1449                 bidi_it->invalid_rl_levels++;
1450               }
1451           }
1452         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1453                  || (bidi_it->next_en_pos > bidi_it->charpos
1454                      && bidi_it->next_en_type == WEAK_EN))
1455           type = WEAK_EN;
1456         break;
1457       case LRE: /* X3 */
1458       case LRO: /* X5 */
1459         bidi_it->type_after_w1 = type;
1460         bidi_check_type (bidi_it->type_after_w1);
1461         type = WEAK_BN; /* X9/Retaining */
1462         if (bidi_it->ignore_bn_limit <= -1)
1463           {
1464             if (current_level <= BIDI_MAXLEVEL - 5)
1465               {
1466                 /* Compute the least even embedding level greater than
1467                    the current level.  */
1468                 new_level = ((current_level + 2) & ~1);
1469                 if (bidi_it->type_after_w1 == LRE)
1470                   override = NEUTRAL_DIR;
1471                 else
1472                   override = L2R;
1473                 bidi_push_embedding_level (bidi_it, new_level, override);
1474               }
1475             else
1476               {
1477                 bidi_it->invalid_levels++;
1478                 /* invalid_rl_levels counts invalid levels encountered
1479                    while the embedding level was already too high for
1480                    LRE/LRO, but not for RLE/RLO.  That is because
1481                    there may be exactly one PDF which we should not
1482                    ignore even though invalid_levels is non-zero.
1483                    invalid_rl_levels helps to know what PDF is
1484                    that.  */
1485                 if (bidi_it->invalid_rl_levels >= 0)
1486                   bidi_it->invalid_rl_levels++;
1487               }
1488           }
1489         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1490                  || (bidi_it->next_en_pos > bidi_it->charpos
1491                      && bidi_it->next_en_type == WEAK_EN))
1492           type = WEAK_EN;
1493         break;
1494       case PDF: /* X7 */
1495         bidi_it->type_after_w1 = type;
1496         bidi_check_type (bidi_it->type_after_w1);
1497         type = WEAK_BN; /* X9/Retaining */
1498         if (bidi_it->ignore_bn_limit <= -1)
1499           {
1500             if (!bidi_it->invalid_rl_levels)
1501               {
1502                 new_level = bidi_pop_embedding_level (bidi_it);
1503                 bidi_it->invalid_rl_levels = -1;
1504                 if (bidi_it->invalid_levels)
1505                   bidi_it->invalid_levels--;
1506                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1507               }
1508             if (!bidi_it->invalid_levels)
1509               new_level = bidi_pop_embedding_level (bidi_it);
1510             else
1511               {
1512                 bidi_it->invalid_levels--;
1513                 bidi_it->invalid_rl_levels--;
1514               }
1515           }
1516         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1517                  || (bidi_it->next_en_pos > bidi_it->charpos
1518                      && bidi_it->next_en_type == WEAK_EN))
1519           type = WEAK_EN;
1520         break;
1521       default:
1522         /* Nothing.  */
1523         break;
1524     }
1525
1526   bidi_it->type = type;
1527   bidi_check_type (bidi_it->type);
1528
1529   return new_level;
1530 }
1531
1532 /* Given an iterator state in BIDI_IT, advance one character position
1533    in the buffer/string to the next character (in the logical order),
1534    resolve any explicit embeddings and directional overrides, and
1535    return the embedding level of the character after resolving
1536    explicit directives and ignoring empty embeddings.  */
1537 static int
1538 bidi_resolve_explicit (struct bidi_it *bidi_it)
1539 {
1540   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1541   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1542   ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1543   const unsigned char *s
1544     = (STRINGP (bidi_it->string.lstring)
1545        ? SDATA (bidi_it->string.lstring)
1546        : bidi_it->string.s);
1547
1548   if (prev_level < new_level
1549       && bidi_it->type == WEAK_BN
1550       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1551       && bidi_it->charpos < eob         /* not already at EOB */
1552       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1553                                                    + bidi_it->ch_len, s,
1554                                                    bidi_it->string.unibyte)))
1555     {
1556       /* Avoid pushing and popping embedding levels if the level run
1557          is empty, as this breaks level runs where it shouldn't.
1558          UAX#9 removes all the explicit embedding and override codes,
1559          so empty embeddings disappear without a trace.  We need to
1560          behave as if we did the same.  */
1561       struct bidi_it saved_it;
1562       int level = prev_level;
1563
1564       bidi_copy_it (&saved_it, bidi_it);
1565
1566       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1567                                                        + bidi_it->ch_len, s,
1568                                                        bidi_it->string.unibyte)))
1569         {
1570           /* This advances to the next character, skipping any
1571              characters covered by display strings.  */
1572           level = bidi_resolve_explicit_1 (bidi_it);
1573           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1574              a pointer to its data is no longer valid.  */
1575           if (STRINGP (bidi_it->string.lstring))
1576             s = SDATA (bidi_it->string.lstring);
1577         }
1578
1579       if (bidi_it->nchars <= 0)
1580         emacs_abort ();
1581       if (level == prev_level)  /* empty embedding */
1582         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1583       else                      /* this embedding is non-empty */
1584         saved_it.ignore_bn_limit = -2;
1585
1586       bidi_copy_it (bidi_it, &saved_it);
1587       if (bidi_it->ignore_bn_limit > -1)
1588         {
1589           /* We pushed a level, but we shouldn't have.  Undo that. */
1590           if (!bidi_it->invalid_rl_levels)
1591             {
1592               new_level = bidi_pop_embedding_level (bidi_it);
1593               bidi_it->invalid_rl_levels = -1;
1594               if (bidi_it->invalid_levels)
1595                 bidi_it->invalid_levels--;
1596             }
1597           if (!bidi_it->invalid_levels)
1598             new_level = bidi_pop_embedding_level (bidi_it);
1599           else
1600             {
1601               bidi_it->invalid_levels--;
1602               bidi_it->invalid_rl_levels--;
1603             }
1604         }
1605     }
1606
1607   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1608     {
1609       bidi_set_paragraph_end (bidi_it);
1610       /* This is needed by bidi_resolve_weak below, and in L1.  */
1611       bidi_it->type_after_w1 = bidi_it->type;
1612       bidi_check_type (bidi_it->type_after_w1);
1613     }
1614
1615   return new_level;
1616 }
1617
1618 /* Advance in the buffer/string, resolve weak types and return the
1619    type of the next character after weak type resolution.  */
1620 static bidi_type_t
1621 bidi_resolve_weak (struct bidi_it *bidi_it)
1622 {
1623   bidi_type_t type;
1624   bidi_dir_t override;
1625   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1626   int new_level  = bidi_resolve_explicit (bidi_it);
1627   int next_char;
1628   bidi_type_t type_of_next;
1629   struct bidi_it saved_it;
1630   ptrdiff_t eob
1631     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1632        ? bidi_it->string.schars : ZV);
1633
1634   type = bidi_it->type;
1635   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1636
1637   if (type == UNKNOWN_BT
1638       || type == LRE
1639       || type == LRO
1640       || type == RLE
1641       || type == RLO
1642       || type == PDF)
1643     emacs_abort ();
1644
1645   if (new_level != prev_level
1646       || bidi_it->type == NEUTRAL_B)
1647     {
1648       /* We've got a new embedding level run, compute the directional
1649          type of sor and initialize per-run variables (UAX#9, clause
1650          X10).  */
1651       bidi_set_sor_type (bidi_it, prev_level, new_level);
1652     }
1653   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1654            || type == WEAK_BN || type == STRONG_AL)
1655     bidi_it->type_after_w1 = type;      /* needed in L1 */
1656   bidi_check_type (bidi_it->type_after_w1);
1657
1658   /* Level and directional override status are already recorded in
1659      bidi_it, and do not need any change; see X6.  */
1660   if (override == R2L)          /* X6 */
1661     type = STRONG_R;
1662   else if (override == L2R)
1663     type = STRONG_L;
1664   else
1665     {
1666       if (type == WEAK_NSM)     /* W1 */
1667         {
1668           /* Note that we don't need to consider the case where the
1669              prev character has its type overridden by an RLO or LRO,
1670              because then either the type of this NSM would have been
1671              also overridden, or the previous character is outside the
1672              current level run, and thus not relevant to this NSM.
1673              This is why NSM gets the type_after_w1 of the previous
1674              character.  */
1675           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1676               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1677               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1678             type = bidi_it->prev.type_after_w1;
1679           else if (bidi_it->sor == R2L)
1680             type = STRONG_R;
1681           else if (bidi_it->sor == L2R)
1682             type = STRONG_L;
1683           else /* shouldn't happen! */
1684             emacs_abort ();
1685         }
1686       if (type == WEAK_EN       /* W2 */
1687           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1688         type = WEAK_AN;
1689       else if (type == STRONG_AL) /* W3 */
1690         type = STRONG_R;
1691       else if ((type == WEAK_ES /* W4 */
1692                 && bidi_it->prev.type_after_w1 == WEAK_EN
1693                 && bidi_it->prev.orig_type == WEAK_EN)
1694                || (type == WEAK_CS
1695                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1696                         && bidi_it->prev.orig_type == WEAK_EN)
1697                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1698         {
1699           const unsigned char *s
1700             = (STRINGP (bidi_it->string.lstring)
1701                ? SDATA (bidi_it->string.lstring)
1702                : bidi_it->string.s);
1703
1704           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1705                        ? BIDI_EOB
1706                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1707                                            s, bidi_it->string.unibyte));
1708           type_of_next = bidi_get_type (next_char, override);
1709
1710           if (type_of_next == WEAK_BN
1711               || bidi_explicit_dir_char (next_char))
1712             {
1713               bidi_copy_it (&saved_it, bidi_it);
1714               while (bidi_resolve_explicit (bidi_it) == new_level
1715                      && bidi_it->type == WEAK_BN)
1716                 ;
1717               type_of_next = bidi_it->type;
1718               bidi_copy_it (bidi_it, &saved_it);
1719             }
1720
1721           /* If the next character is EN, but the last strong-type
1722              character is AL, that next EN will be changed to AN when
1723              we process it in W2 above.  So in that case, this ES
1724              should not be changed into EN.  */
1725           if (type == WEAK_ES
1726               && type_of_next == WEAK_EN
1727               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1728             type = WEAK_EN;
1729           else if (type == WEAK_CS)
1730             {
1731               if (bidi_it->prev.type_after_w1 == WEAK_AN
1732                   && (type_of_next == WEAK_AN
1733                       /* If the next character is EN, but the last
1734                          strong-type character is AL, EN will be later
1735                          changed to AN when we process it in W2 above.
1736                          So in that case, this ES should not be
1737                          changed into EN.  */
1738                       || (type_of_next == WEAK_EN
1739                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1740                 type = WEAK_AN;
1741               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1742                        && type_of_next == WEAK_EN
1743                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1744                 type = WEAK_EN;
1745             }
1746         }
1747       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1748                || type == WEAK_BN)      /* W5/Retaining */
1749         {
1750           if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1751             type = WEAK_EN;
1752           else if (bidi_it->next_en_pos > bidi_it->charpos
1753                    && bidi_it->next_en_type != WEAK_BN)
1754             {
1755               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1756                 type = WEAK_EN;
1757             }
1758           else if (bidi_it->next_en_pos >=0)
1759             {
1760               ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
1761               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1762                                         ? SDATA (bidi_it->string.lstring)
1763                                         : bidi_it->string.s);
1764
1765               if (bidi_it->nchars <= 0)
1766                 emacs_abort ();
1767               next_char
1768                 = (bidi_it->charpos + bidi_it->nchars >= eob
1769                    ? BIDI_EOB
1770                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1771                                        bidi_it->string.unibyte));
1772               type_of_next = bidi_get_type (next_char, override);
1773
1774               if (type_of_next == WEAK_ET
1775                   || type_of_next == WEAK_BN
1776                   || bidi_explicit_dir_char (next_char))
1777                 {
1778                   bidi_copy_it (&saved_it, bidi_it);
1779                   while (bidi_resolve_explicit (bidi_it) == new_level
1780                          && (bidi_it->type == WEAK_BN
1781                              || bidi_it->type == WEAK_ET))
1782                     ;
1783                   type_of_next = bidi_it->type;
1784                   en_pos = bidi_it->charpos;
1785                   bidi_copy_it (bidi_it, &saved_it);
1786                 }
1787               /* Remember this position, to speed up processing of the
1788                  next ETs.  */
1789               bidi_it->next_en_pos = en_pos;
1790               if (type_of_next == WEAK_EN)
1791                 {
1792                   /* If the last strong character is AL, the EN we've
1793                      found will become AN when we get to it (W2). */
1794                   if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1795                     type_of_next = WEAK_AN;
1796                   else if (type == WEAK_BN)
1797                     type = NEUTRAL_ON; /* W6/Retaining */
1798                   else
1799                     type = WEAK_EN;
1800                 }
1801               else if (type_of_next == NEUTRAL_B)
1802                 /* Record the fact that there are no more ENs from
1803                    here to the end of paragraph, to avoid entering the
1804                    loop above ever again in this paragraph.  */
1805                 bidi_it->next_en_pos = -1;
1806               /* Record the type of the character where we ended our search.  */
1807               bidi_it->next_en_type = type_of_next;
1808             }
1809         }
1810     }
1811
1812   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1813       || (type == WEAK_BN
1814           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1815               || bidi_it->prev.type_after_w1 == WEAK_ES
1816               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1817     type = NEUTRAL_ON;
1818
1819   /* Store the type we've got so far, before we clobber it with strong
1820      types in W7 and while resolving neutral types.  But leave alone
1821      the original types that were recorded above, because we will need
1822      them for the L1 clause.  */
1823   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1824     bidi_it->type_after_w1 = type;
1825   bidi_check_type (bidi_it->type_after_w1);
1826
1827   if (type == WEAK_EN)  /* W7 */
1828     {
1829       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1830           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1831         type = STRONG_L;
1832     }
1833
1834   bidi_it->type = type;
1835   bidi_check_type (bidi_it->type);
1836   return type;
1837 }
1838
1839 /* Resolve the type of a neutral character according to the type of
1840    surrounding strong text and the current embedding level.  */
1841 static bidi_type_t
1842 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1843 {
1844   /* N1: European and Arabic numbers are treated as though they were R.  */
1845   if (next_type == WEAK_EN || next_type == WEAK_AN)
1846     next_type = STRONG_R;
1847   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1848     prev_type = STRONG_R;
1849
1850   if (next_type == prev_type)   /* N1 */
1851     return next_type;
1852   else if ((lev & 1) == 0)      /* N2 */
1853     return STRONG_L;
1854   else
1855     return STRONG_R;
1856 }
1857
1858 static bidi_type_t
1859 bidi_resolve_neutral (struct bidi_it *bidi_it)
1860 {
1861   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1862   bidi_type_t type = bidi_resolve_weak (bidi_it);
1863   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1864
1865   if (!(type == STRONG_R
1866         || type == STRONG_L
1867         || type == WEAK_BN
1868         || type == WEAK_EN
1869         || type == WEAK_AN
1870         || type == NEUTRAL_B
1871         || type == NEUTRAL_S
1872         || type == NEUTRAL_WS
1873         || type == NEUTRAL_ON))
1874     emacs_abort ();
1875
1876   if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1877                             we are already at paragraph end.  */
1878        && bidi_get_category (type) == NEUTRAL)
1879       || (type == WEAK_BN && prev_level == current_level))
1880     {
1881       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1882         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1883                                        bidi_it->next_for_neutral.type,
1884                                        current_level);
1885       /* The next two "else if" clauses are shortcuts for the
1886          important special case when we have a long sequence of
1887          neutral or WEAK_BN characters, such as whitespace or nulls or
1888          other control characters, on the base embedding level of the
1889          paragraph, and that sequence goes all the way to the end of
1890          the paragraph and follows a character whose resolved
1891          directionality is identical to the base embedding level.
1892          (This is what happens in a buffer with plain L2R text that
1893          happens to include long sequences of control characters.)  By
1894          virtue of N1, the result of examining this long sequence will
1895          always be either STRONG_L or STRONG_R, depending on the base
1896          embedding level.  So we use this fact directly instead of
1897          entering the expensive loop in the "else" clause.  */
1898       else if (current_level == 0
1899                && bidi_it->prev_for_neutral.type == STRONG_L
1900                && !bidi_explicit_dir_char (bidi_it->ch))
1901         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1902                                        STRONG_L, current_level);
1903       else if (/* current level is 1 */
1904                current_level == 1
1905                /* base embedding level is also 1 */
1906                && bidi_it->level_stack[0].level == 1
1907                /* previous character is one of those considered R for
1908                   the purposes of W5 */
1909                && (bidi_it->prev_for_neutral.type == STRONG_R
1910                    || bidi_it->prev_for_neutral.type == WEAK_EN
1911                    || bidi_it->prev_for_neutral.type == WEAK_AN)
1912                && !bidi_explicit_dir_char (bidi_it->ch))
1913         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1914                                        STRONG_R, current_level);
1915       else
1916         {
1917           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1918              the assumption of batch-style processing; see clauses W4,
1919              W5, and especially N1, which require to look far forward
1920              (as well as back) in the buffer/string.  May the fleas of
1921              a thousand camels infest the armpits of those who design
1922              supposedly general-purpose algorithms by looking at their
1923              own implementations, and fail to consider other possible
1924              implementations!  */
1925           struct bidi_it saved_it;
1926           bidi_type_t next_type;
1927
1928           if (bidi_it->scan_dir == -1)
1929             emacs_abort ();
1930
1931           bidi_copy_it (&saved_it, bidi_it);
1932           /* Scan the text forward until we find the first non-neutral
1933              character, and then use that to resolve the neutral we
1934              are dealing with now.  We also cache the scanned iterator
1935              states, to salvage some of the effort later.  */
1936           bidi_cache_iterator_state (bidi_it, 0);
1937           do {
1938             /* Record the info about the previous character, so that
1939                it will be cached below with this state.  */
1940             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1941                 && bidi_it->type != WEAK_BN)
1942               bidi_remember_char (&bidi_it->prev, bidi_it);
1943             type = bidi_resolve_weak (bidi_it);
1944             /* Paragraph separators have their levels fully resolved
1945                at this point, so cache them as resolved.  */
1946             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1947             /* FIXME: implement L1 here, by testing for a newline and
1948                resetting the level for any sequence of whitespace
1949                characters adjacent to it.  */
1950           } while (!(type == NEUTRAL_B
1951                      || (type != WEAK_BN
1952                          && bidi_get_category (type) != NEUTRAL)
1953                      /* This is all per level run, so stop when we
1954                         reach the end of this level run.  */
1955                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1956                          != current_level)));
1957
1958           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1959
1960           switch (type)
1961             {
1962               case STRONG_L:
1963               case STRONG_R:
1964               case STRONG_AL:
1965                 /* Actually, STRONG_AL cannot happen here, because
1966                    bidi_resolve_weak converts it to STRONG_R, per W3.  */
1967                 eassert (type != STRONG_AL);
1968                 next_type = type;
1969                 break;
1970               case WEAK_EN:
1971               case WEAK_AN:
1972                 /* N1: ``European and Arabic numbers are treated as
1973                    though they were R.''  */
1974                 next_type = STRONG_R;
1975                 break;
1976               case WEAK_BN:
1977               case NEUTRAL_ON:  /* W6/Retaining */
1978                 if (!bidi_explicit_dir_char (bidi_it->ch))
1979                   emacs_abort (); /* can't happen: BNs are skipped */
1980                 /* FALLTHROUGH */
1981               case NEUTRAL_B:
1982                 /* Marched all the way to the end of this level run.
1983                    We need to use the eor type, whose information is
1984                    stored by bidi_set_sor_type in the prev_for_neutral
1985                    member.  */
1986                 if (saved_it.type != WEAK_BN
1987                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1988                   next_type = bidi_it->prev_for_neutral.type;
1989                 else
1990                   {
1991                     /* This is a BN which does not adjoin neutrals.
1992                        Leave its type alone.  */
1993                     bidi_copy_it (bidi_it, &saved_it);
1994                     return bidi_it->type;
1995                   }
1996                 break;
1997               default:
1998                 emacs_abort ();
1999             }
2000           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
2001                                          next_type, current_level);
2002           saved_it.next_for_neutral.type = next_type;
2003           saved_it.type = type;
2004           bidi_check_type (next_type);
2005           bidi_check_type (type);
2006           bidi_copy_it (bidi_it, &saved_it);
2007         }
2008     }
2009   return type;
2010 }
2011
2012 /* Given an iterator state in BIDI_IT, advance one character position
2013    in the buffer/string to the next character (in the logical order),
2014    resolve the bidi type of that next character, and return that
2015    type.  */
2016 static bidi_type_t
2017 bidi_type_of_next_char (struct bidi_it *bidi_it)
2018 {
2019   bidi_type_t type;
2020
2021   /* This should always be called during a forward scan.  */
2022   if (bidi_it->scan_dir != 1)
2023     emacs_abort ();
2024
2025   /* Reset the limit until which to ignore BNs if we step out of the
2026      area where we found only empty levels.  */
2027   if ((bidi_it->ignore_bn_limit > -1
2028        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
2029       || (bidi_it->ignore_bn_limit == -2
2030           && !bidi_explicit_dir_char (bidi_it->ch)))
2031     bidi_it->ignore_bn_limit = -1;
2032
2033   type = bidi_resolve_neutral (bidi_it);
2034
2035   return type;
2036 }
2037
2038 /* Given an iterator state BIDI_IT, advance one character position in
2039    the buffer/string to the next character (in the current scan
2040    direction), resolve the embedding and implicit levels of that next
2041    character, and return the resulting level.  */
2042 static int
2043 bidi_level_of_next_char (struct bidi_it *bidi_it)
2044 {
2045   bidi_type_t type;
2046   int level, prev_level = -1;
2047   struct bidi_saved_info next_for_neutral;
2048   ptrdiff_t next_char_pos = -2;
2049
2050   if (bidi_it->scan_dir == 1)
2051     {
2052       ptrdiff_t eob
2053         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2054            ? bidi_it->string.schars : ZV);
2055
2056       /* There's no sense in trying to advance if we hit end of text.  */
2057       if (bidi_it->charpos >= eob)
2058         return bidi_it->resolved_level;
2059
2060       /* Record the info about the previous character.  */
2061       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2062           && bidi_it->type != WEAK_BN)
2063         bidi_remember_char (&bidi_it->prev, bidi_it);
2064       if (bidi_it->type_after_w1 == STRONG_R
2065           || bidi_it->type_after_w1 == STRONG_L
2066           || bidi_it->type_after_w1 == STRONG_AL)
2067         bidi_remember_char (&bidi_it->last_strong, bidi_it);
2068       /* FIXME: it sounds like we don't need both prev and
2069          prev_for_neutral members, but I'm leaving them both for now.  */
2070       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2071           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2072         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2073
2074       /* If we overstepped the characters used for resolving neutrals
2075          and whitespace, invalidate their info in the iterator.  */
2076       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2077         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2078       if (bidi_it->next_en_pos >= 0
2079           && bidi_it->charpos >= bidi_it->next_en_pos)
2080         {
2081           bidi_it->next_en_pos = 0;
2082           bidi_it->next_en_type = UNKNOWN_BT;
2083         }
2084       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2085           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2086         bidi_it->next_for_ws.type = UNKNOWN_BT;
2087
2088       /* This must be taken before we fill the iterator with the info
2089          about the next char.  If we scan backwards, the iterator
2090          state must be already cached, so there's no need to know the
2091          embedding level of the previous character, since we will be
2092          returning to our caller shortly.  */
2093       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2094     }
2095   next_for_neutral = bidi_it->next_for_neutral;
2096
2097   /* Perhaps the character we want is already cached.  If it is, the
2098      call to bidi_cache_find below will return a type other than
2099      UNKNOWN_BT.  */
2100   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2101     {
2102       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2103                  ? 0 : 1);
2104       if (bidi_it->scan_dir > 0)
2105         {
2106           if (bidi_it->nchars <= 0)
2107             emacs_abort ();
2108           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2109         }
2110       else if (bidi_it->charpos >= bob)
2111         /* Implementation note: we allow next_char_pos to be as low as
2112            0 for buffers or -1 for strings, and that is okay because
2113            that's the "position" of the sentinel iterator state we
2114            cached at the beginning of the iteration.  */
2115         next_char_pos = bidi_it->charpos - 1;
2116       if (next_char_pos >= bob - 1)
2117         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2118       else
2119         type = UNKNOWN_BT;
2120     }
2121   else
2122     type = UNKNOWN_BT;
2123   if (type != UNKNOWN_BT)
2124     {
2125       /* Don't lose the information for resolving neutrals!  The
2126          cached states could have been cached before their
2127          next_for_neutral member was computed.  If we are on our way
2128          forward, we can simply take the info from the previous
2129          state.  */
2130       if (bidi_it->scan_dir == 1
2131           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2132         bidi_it->next_for_neutral = next_for_neutral;
2133
2134       /* If resolved_level is -1, it means this state was cached
2135          before it was completely resolved, so we cannot return
2136          it.  */
2137       if (bidi_it->resolved_level != -1)
2138         return bidi_it->resolved_level;
2139     }
2140   if (bidi_it->scan_dir == -1)
2141     /* If we are going backwards, the iterator state is already cached
2142        from previous scans, and should be fully resolved.  */
2143     emacs_abort ();
2144
2145   if (type == UNKNOWN_BT)
2146     type = bidi_type_of_next_char (bidi_it);
2147
2148   if (type == NEUTRAL_B)
2149     return bidi_it->resolved_level;
2150
2151   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2152   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2153       || (type == WEAK_BN && prev_level == level))
2154     {
2155       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2156         emacs_abort ();
2157
2158       /* If the cached state shows a neutral character, it was not
2159          resolved by bidi_resolve_neutral, so do it now.  */
2160       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2161                                      bidi_it->next_for_neutral.type,
2162                                      level);
2163     }
2164
2165   if (!(type == STRONG_R
2166         || type == STRONG_L
2167         || type == WEAK_BN
2168         || type == WEAK_EN
2169         || type == WEAK_AN))
2170     emacs_abort ();
2171   bidi_it->type = type;
2172   bidi_check_type (bidi_it->type);
2173
2174   /* For L1 below, we need to know, for each WS character, whether
2175      it belongs to a sequence of WS characters preceding a newline
2176      or a TAB or a paragraph separator.  */
2177   if (bidi_it->orig_type == NEUTRAL_WS
2178       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2179     {
2180       int ch;
2181       ptrdiff_t clen = bidi_it->ch_len;
2182       ptrdiff_t bpos = bidi_it->bytepos;
2183       ptrdiff_t cpos = bidi_it->charpos;
2184       ptrdiff_t disp_pos = bidi_it->disp_pos;
2185       ptrdiff_t nc = bidi_it->nchars;
2186       struct bidi_string_data bs = bidi_it->string;
2187       bidi_type_t chtype;
2188       bool fwp = bidi_it->frame_window_p;
2189       int dpp = bidi_it->disp_prop;
2190
2191       if (bidi_it->nchars <= 0)
2192         emacs_abort ();
2193       do {
2194         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2195                               fwp, &clen, &nc);
2196         if (ch == '\n' || ch == BIDI_EOB)
2197           chtype = NEUTRAL_B;
2198         else
2199           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2200       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2201                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2202       bidi_it->next_for_ws.type = chtype;
2203       bidi_check_type (bidi_it->next_for_ws.type);
2204       bidi_it->next_for_ws.charpos = cpos;
2205       bidi_it->next_for_ws.bytepos = bpos;
2206     }
2207
2208   /* Resolve implicit levels, with a twist: PDFs get the embedding
2209      level of the embedding they terminate.  See below for the
2210      reason.  */
2211   if (bidi_it->orig_type == PDF
2212       /* Don't do this if this formatting code didn't change the
2213          embedding level due to invalid or empty embeddings.  */
2214       && prev_level != level)
2215     {
2216       /* Don't look in UAX#9 for the reason for this: it's our own
2217          private quirk.  The reason is that we want the formatting
2218          codes to be delivered so that they bracket the text of their
2219          embedding.  For example, given the text
2220
2221              {RLO}teST{PDF}
2222
2223          we want it to be displayed as
2224
2225              {PDF}STet{RLO}
2226
2227          not as
2228
2229              STet{RLO}{PDF}
2230
2231          which will result because we bump up the embedding level as
2232          soon as we see the RLO and pop it as soon as we see the PDF,
2233          so RLO itself has the same embedding level as "teST", and
2234          thus would be normally delivered last, just before the PDF.
2235          The switch below fiddles with the level of PDF so that this
2236          ugly side effect does not happen.
2237
2238          (This is, of course, only important if the formatting codes
2239          are actually displayed, but Emacs does need to display them
2240          if the user wants to.)  */
2241       level = prev_level;
2242     }
2243   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2244            || bidi_it->orig_type == NEUTRAL_S
2245            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2246            || (bidi_it->orig_type == NEUTRAL_WS
2247                && (bidi_it->next_for_ws.type == NEUTRAL_B
2248                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2249     level = bidi_it->level_stack[0].level;
2250   else if ((level & 1) == 0) /* I1 */
2251     {
2252       if (type == STRONG_R)
2253         level++;
2254       else if (type == WEAK_EN || type == WEAK_AN)
2255         level += 2;
2256     }
2257   else                  /* I2 */
2258     {
2259       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2260         level++;
2261     }
2262
2263   bidi_it->resolved_level = level;
2264   return level;
2265 }
2266
2267 /* Move to the other edge of a level given by LEVEL.  If END_FLAG,
2268    we are at the end of a level, and we need to prepare to
2269    resume the scan of the lower level.
2270
2271    If this level's other edge is cached, we simply jump to it, filling
2272    the iterator structure with the iterator state on the other edge.
2273    Otherwise, we walk the buffer or string until we come back to the
2274    same level as LEVEL.
2275
2276    Note: we are not talking here about a ``level run'' in the UAX#9
2277    sense of the term, but rather about a ``level'' which includes
2278    all the levels higher than it.  In other words, given the levels
2279    like this:
2280
2281          11111112222222333333334443343222222111111112223322111
2282                 A      B                    C
2283
2284    and assuming we are at point A scanning left to right, this
2285    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2286    at point B.  */
2287 static void
2288 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
2289 {
2290   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2291   ptrdiff_t idx;
2292
2293   /* Try the cache first.  */
2294   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2295       >= bidi_cache_start)
2296     bidi_cache_fetch_state (idx, bidi_it);
2297   else
2298     {
2299       int new_level;
2300
2301       /* If we are at end of level, its edges must be cached.  */
2302       if (end_flag)
2303         emacs_abort ();
2304
2305       bidi_cache_iterator_state (bidi_it, 1);
2306       do {
2307         new_level = bidi_level_of_next_char (bidi_it);
2308         bidi_cache_iterator_state (bidi_it, 1);
2309       } while (new_level >= level);
2310     }
2311 }
2312
2313 void
2314 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2315 {
2316   int old_level, new_level, next_level;
2317   struct bidi_it sentinel;
2318   struct gcpro gcpro1;
2319
2320   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2321     emacs_abort ();
2322
2323   if (bidi_it->scan_dir == 0)
2324     {
2325       bidi_it->scan_dir = 1;    /* default to logical order */
2326     }
2327
2328   /* The code below can call eval, and thus cause GC.  If we are
2329      iterating a Lisp string, make sure it won't be GCed.  */
2330   if (STRINGP (bidi_it->string.lstring))
2331     GCPRO1 (bidi_it->string.lstring);
2332
2333   /* If we just passed a newline, initialize for the next line.  */
2334   if (!bidi_it->first_elt
2335       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2336     bidi_line_init (bidi_it);
2337
2338   /* Prepare the sentinel iterator state, and cache it.  When we bump
2339      into it, scanning backwards, we'll know that the last non-base
2340      level is exhausted.  */
2341   if (bidi_cache_idx == bidi_cache_start)
2342     {
2343       bidi_copy_it (&sentinel, bidi_it);
2344       if (bidi_it->first_elt)
2345         {
2346           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2347           sentinel.bytepos--;
2348           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2349           sentinel.ch_len = 1;
2350           sentinel.nchars = 1;
2351         }
2352       bidi_cache_iterator_state (&sentinel, 1);
2353     }
2354
2355   old_level = bidi_it->resolved_level;
2356   new_level = bidi_level_of_next_char (bidi_it);
2357
2358   /* Reordering of resolved levels (clause L2) is implemented by
2359      jumping to the other edge of the level and flipping direction of
2360      scanning the text whenever we find a level change.  */
2361   if (new_level != old_level)
2362     {
2363       bool ascending = new_level > old_level;
2364       int level_to_search = ascending ? old_level + 1 : old_level;
2365       int incr = ascending ? 1 : -1;
2366       int expected_next_level = old_level + incr;
2367
2368       /* Jump (or walk) to the other edge of this level.  */
2369       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2370       /* Switch scan direction and peek at the next character in the
2371          new direction.  */
2372       bidi_it->scan_dir = -bidi_it->scan_dir;
2373
2374       /* The following loop handles the case where the resolved level
2375          jumps by more than one.  This is typical for numbers inside a
2376          run of text with left-to-right embedding direction, but can
2377          also happen in other situations.  In those cases the decision
2378          where to continue after a level change, and in what direction,
2379          is tricky.  For example, given a text like below:
2380
2381                   abcdefgh
2382                   11336622
2383
2384          (where the numbers below the text show the resolved levels),
2385          the result of reordering according to UAX#9 should be this:
2386
2387                   efdcghba
2388
2389          This is implemented by the loop below which flips direction
2390          and jumps to the other edge of the level each time it finds
2391          the new level not to be the expected one.  The expected level
2392          is always one more or one less than the previous one.  */
2393       next_level = bidi_peek_at_next_level (bidi_it);
2394       while (next_level != expected_next_level)
2395         {
2396           /* If next_level is -1, it means we have an unresolved level
2397              in the cache, which at this point should not happen.  If
2398              it does, we will infloop.  */
2399           eassert (next_level >= 0);
2400           expected_next_level += incr;
2401           level_to_search += incr;
2402           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2403           bidi_it->scan_dir = -bidi_it->scan_dir;
2404           next_level = bidi_peek_at_next_level (bidi_it);
2405         }
2406
2407       /* Finally, deliver the next character in the new direction.  */
2408       next_level = bidi_level_of_next_char (bidi_it);
2409     }
2410
2411   /* Take note when we have just processed the newline that precedes
2412      the end of the paragraph.  The next time we are about to be
2413      called, set_iterator_to_next will automatically reinit the
2414      paragraph direction, if needed.  We do this at the newline before
2415      the paragraph separator, because the next character might not be
2416      the first character of the next paragraph, due to the bidi
2417      reordering, whereas we _must_ know the paragraph base direction
2418      _before_ we process the paragraph's text, since the base
2419      direction affects the reordering.  */
2420   if (bidi_it->scan_dir == 1
2421       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2422     {
2423       /* The paragraph direction of the entire string, once
2424          determined, is in effect for the entire string.  Setting the
2425          separator limit to the end of the string prevents
2426          bidi_paragraph_init from being called automatically on this
2427          string.  */
2428       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2429         bidi_it->separator_limit = bidi_it->string.schars;
2430       else if (bidi_it->bytepos < ZV_BYTE)
2431         {
2432           ptrdiff_t sep_len
2433             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2434                                      bidi_it->bytepos + bidi_it->ch_len);
2435           if (bidi_it->nchars <= 0)
2436             emacs_abort ();
2437           if (sep_len >= 0)
2438             {
2439               bidi_it->new_paragraph = 1;
2440               /* Record the buffer position of the last character of the
2441                  paragraph separator.  */
2442               bidi_it->separator_limit
2443                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2444             }
2445         }
2446     }
2447
2448   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2449     {
2450       /* If we are at paragraph's base embedding level and beyond the
2451          last cached position, the cache's job is done and we can
2452          discard it.  */
2453       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2454           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2455                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2456         bidi_cache_reset ();
2457         /* But as long as we are caching during forward scan, we must
2458            cache each state, or else the cache integrity will be
2459            compromised: it assumes cached states correspond to buffer
2460            positions 1:1.  */
2461       else
2462         bidi_cache_iterator_state (bidi_it, 1);
2463     }
2464
2465   if (STRINGP (bidi_it->string.lstring))
2466     UNGCPRO;
2467 }
2468
2469 /* This is meant to be called from within the debugger, whenever you
2470    wish to examine the cache contents.  */
2471 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2472 void
2473 bidi_dump_cached_states (void)
2474 {
2475   ptrdiff_t i;
2476   int ndigits = 1;
2477
2478   if (bidi_cache_idx == 0)
2479     {
2480       fprintf (stderr, "The cache is empty.\n");
2481       return;
2482     }
2483   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2484            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2485
2486   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2487     ndigits++;
2488   fputs ("ch  ", stderr);
2489   for (i = 0; i < bidi_cache_idx; i++)
2490     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2491   fputs ("\n", stderr);
2492   fputs ("lvl ", stderr);
2493   for (i = 0; i < bidi_cache_idx; i++)
2494     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2495   fputs ("\n", stderr);
2496   fputs ("pos ", stderr);
2497   for (i = 0; i < bidi_cache_idx; i++)
2498     fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
2499   fputs ("\n", stderr);
2500 }