src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2012
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications described in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 /* UAX#9 says to search only for L, AL, or R types of characters, and
  83    ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
  84    level.  Yudit indeed ignores them.  This variable is therefore set
  85    by default to ignore them, but setting it to zero will take them
  86    into account.  */
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 \f
  94 /***********************************************************************
  95                         Utilities
  96  ***********************************************************************/
  97
  98 /* Return the bidi type of a character CH, subject to the current
  99    directional OVERRIDE.  */
 100 static inline bidi_type_t
 101 bidi_get_type (int ch, bidi_dir_t override)
 102 {
 103   bidi_type_t default_type;
 104
 105   if (ch == BIDI_EOB)
 106     return NEUTRAL_B;
 107   if (ch < 0 || ch > MAX_CHAR)
 108     abort ();
 109
 110   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 111   /* Every valid character code, even those that are unassigned by the
 112      UCD, have some bidi-class property, according to
 113      DerivedBidiClass.txt file.  Therefore, if we ever get UNKNOWN_BT
 114      (= zero) code from CHAR_TABLE_REF, that's a bug.  */
 115   if (default_type == UNKNOWN_BT)
 116     abort ();
 117
 118   if (override == NEUTRAL_DIR)
 119     return default_type;
 120
 121   switch (default_type)
 122     {
 123       /* Although UAX#9 does not tell, it doesn't make sense to
 124          override NEUTRAL_B and LRM/RLM characters.  */
 125       case NEUTRAL_B:
 126       case LRE:
 127       case LRO:
 128       case RLE:
 129       case RLO:
 130       case PDF:
 131         return default_type;
 132       default:
 133         switch (ch)
 134           {
 135             case LRM_CHAR:
 136             case RLM_CHAR:
 137               return default_type;
 138             default:
 139               if (override == L2R) /* X6 */
 140                 return STRONG_L;
 141               else if (override == R2L)
 142                 return STRONG_R;
 143               else
 144                 abort ();       /* can't happen: handled above */
 145           }
 146     }
 147 }
 148
 149 static inline void
 150 bidi_check_type (bidi_type_t type)
 151 {
 152   xassert (UNKNOWN_BT <= type && type <= NEUTRAL_ON);
 153 }
 154
 155 /* Given a bidi TYPE of a character, return its category.  */
 156 static inline bidi_category_t
 157 bidi_get_category (bidi_type_t type)
 158 {
 159   switch (type)
 160     {
 161       case UNKNOWN_BT:
 162         return UNKNOWN_BC;
 163       case STRONG_L:
 164       case STRONG_R:
 165       case STRONG_AL:
 166       case LRE:
 167       case LRO:
 168       case RLE:
 169       case RLO:
 170         return STRONG;
 171       case PDF:         /* ??? really?? */
 172       case WEAK_EN:
 173       case WEAK_ES:
 174       case WEAK_ET:
 175       case WEAK_AN:
 176       case WEAK_CS:
 177       case WEAK_NSM:
 178       case WEAK_BN:
 179         return WEAK;
 180       case NEUTRAL_B:
 181       case NEUTRAL_S:
 182       case NEUTRAL_WS:
 183       case NEUTRAL_ON:
 184         return NEUTRAL;
 185       default:
 186         abort ();
 187     }
 188 }
 189
 190 /* Return the mirrored character of C, if it has one.  If C has no
 191    mirrored counterpart, return C.
 192    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 193    context must be tested by the caller.  */
 194 int
 195 bidi_mirror_char (int c)
 196 {
 197   Lisp_Object val;
 198
 199   if (c == BIDI_EOB)
 200     return c;
 201   if (c < 0 || c > MAX_CHAR)
 202     abort ();
 203
 204   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 205   if (INTEGERP (val))
 206     {
 207       int v;
 208
 209       /* When debugging, check before assigning to V, so that the check
 210          isn't broken by undefined behavior due to int overflow.  */
 211       eassert (CHAR_VALID_P (XINT (val)));
 212
 213       v = XINT (val);
 214
 215       /* Minimal test we must do in optimized builds, to prevent weird
 216          crashes further down the road.  */
 217       if (v < 0 || v > MAX_CHAR)
 218         abort ();
 219
 220       return v;
 221     }
 222
 223   return c;
 224 }
 225
 226 /* Determine the start-of-run (sor) directional type given the two
 227    embedding levels on either side of the run boundary.  Also, update
 228    the saved info about previously seen characters, since that info is
 229    generally valid for a single level run.  */
 230 static inline void
 231 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 232 {
 233   int higher_level = (level_before > level_after ? level_before : level_after);
 234
 235   /* The prev_was_pdf gork is required for when we have several PDFs
 236      in a row.  In that case, we want to compute the sor type for the
 237      next level run only once: when we see the first PDF.  That's
 238      because the sor type depends only on the higher of the two levels
 239      that we find on the two sides of the level boundary (see UAX#9,
 240      clause X10), and so we don't need to know the final embedding
 241      level to which we descend after processing all the PDFs.  */
 242   if (!bidi_it->prev_was_pdf || level_before < level_after)
 243     /* FIXME: should the default sor direction be user selectable?  */
 244     bidi_it->sor = ((higher_level & 1) != 0 ? R2L : L2R);
 245   if (level_before > level_after)
 246     bidi_it->prev_was_pdf = 1;
 247
 248   bidi_it->prev.type = UNKNOWN_BT;
 249   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 250     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 251   bidi_it->prev_for_neutral.type = (bidi_it->sor == R2L ? STRONG_R : STRONG_L);
 252   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 253   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 254   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1
 255     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 256   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 257 }
 258
 259 /* Push the current embedding level and override status; reset the
 260    current level to LEVEL and the current override status to OVERRIDE.  */
 261 static inline void
 262 bidi_push_embedding_level (struct bidi_it *bidi_it,
 263                            int level, bidi_dir_t override)
 264 {
 265   bidi_it->stack_idx++;
 266   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 267   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 268   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 269 }
 270
 271 /* Pop the embedding level and directional override status from the
 272    stack, and return the new level.  */
 273 static inline int
 274 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 275 {
 276   /* UAX#9 says to ignore invalid PDFs.  */
 277   if (bidi_it->stack_idx > 0)
 278     bidi_it->stack_idx--;
 279   return bidi_it->level_stack[bidi_it->stack_idx].level;
 280 }
 281
 282 /* Record in SAVED_INFO the information about the current character.  */
 283 static inline void
 284 bidi_remember_char (struct bidi_saved_info *saved_info,
 285                     struct bidi_it *bidi_it)
 286 {
 287   saved_info->charpos = bidi_it->charpos;
 288   saved_info->bytepos = bidi_it->bytepos;
 289   saved_info->type = bidi_it->type;
 290   bidi_check_type (bidi_it->type);
 291   saved_info->type_after_w1 = bidi_it->type_after_w1;
 292   bidi_check_type (bidi_it->type_after_w1);
 293   saved_info->orig_type = bidi_it->orig_type;
 294   bidi_check_type (bidi_it->orig_type);
 295 }
 296
 297 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 298    copies the part of the level stack that is actually in use.  */
 299 static inline void
 300 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 301 {
 302   int i;
 303
 304   /* Copy everything except the level stack and beyond.  */
 305   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 306
 307   /* Copy the active part of the level stack.  */
 308   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 309   for (i = 1; i <= from->stack_idx; i++)
 310     to->level_stack[i] = from->level_stack[i];
 311 }
 312
 313 \f
 314 /***********************************************************************
 315                         Caching the bidi iterator states
 316  ***********************************************************************/
 317
 318 #define BIDI_CACHE_CHUNK 200
 319 static struct bidi_it *bidi_cache;
 320 static ptrdiff_t bidi_cache_size = 0;
 321 enum { elsz = sizeof (struct bidi_it) };
 322 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 323 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 324 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 325                                            "stack" level */
 326
 327 /* 5-slot stack for saving the start of the previous level of the
 328    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 329    and we need the same size of our stack.  */
 330 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 331 static int bidi_cache_sp;
 332
 333 /* Size of header used by bidi_shelve_cache.  */
 334 enum
 335   {
 336     bidi_shelve_header_size
 337       = (sizeof (bidi_cache_idx) + sizeof (bidi_cache_start_stack)
 338          + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 339          + sizeof (bidi_cache_last_idx))
 340   };
 341
 342 /* Reset the cache state to the empty state.  We only reset the part
 343    of the cache relevant to iteration of the current object.  Previous
 344    objects, which are pushed on the display iterator's stack, are left
 345    intact.  This is called when the cached information is no more
 346    useful for the current iteration, e.g. when we were reseated to a
 347    new position on the same object.  */
 348 static inline void
 349 bidi_cache_reset (void)
 350 {
 351   bidi_cache_idx = bidi_cache_start;
 352   bidi_cache_last_idx = -1;
 353 }
 354
 355 /* Shrink the cache to its minimal size.  Called when we init the bidi
 356    iterator for reordering a buffer or a string that does not come
 357    from display properties, because that means all the previously
 358    cached info is of no further use.  */
 359 static inline void
 360 bidi_cache_shrink (void)
 361 {
 362   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 363     {
 364       bidi_cache
 365         = (struct bidi_it *) xrealloc (bidi_cache, BIDI_CACHE_CHUNK * elsz);
 366       bidi_cache_size = BIDI_CACHE_CHUNK;
 367     }
 368   bidi_cache_reset ();
 369 }
 370
 371 static inline void
 372 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 373 {
 374   int current_scan_dir = bidi_it->scan_dir;
 375
 376   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 377     abort ();
 378
 379   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 380   bidi_it->scan_dir = current_scan_dir;
 381   bidi_cache_last_idx = idx;
 382 }
 383
 384 /* Find a cached state with a given CHARPOS and resolved embedding
 385    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 386    resolved levels in cached states.  DIR, if non-zero, means search
 387    in that direction from the last cache hit.  */
 388 static inline ptrdiff_t
 389 bidi_cache_search (ptrdiff_t charpos, int level, int dir)
 390 {
 391   ptrdiff_t i, i_start;
 392
 393   if (bidi_cache_idx > bidi_cache_start)
 394     {
 395       if (bidi_cache_last_idx == -1)
 396         bidi_cache_last_idx = bidi_cache_idx - 1;
 397       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 398         {
 399           dir = -1;
 400           i_start = bidi_cache_last_idx - 1;
 401         }
 402       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 403                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 404         {
 405           dir = 1;
 406           i_start = bidi_cache_last_idx + 1;
 407         }
 408       else if (dir)
 409         i_start = bidi_cache_last_idx;
 410       else
 411         {
 412           dir = -1;
 413           i_start = bidi_cache_idx - 1;
 414         }
 415
 416       if (dir < 0)
 417         {
 418           /* Linear search for now; FIXME!  */
 419           for (i = i_start; i >= bidi_cache_start; i--)
 420             if (bidi_cache[i].charpos <= charpos
 421                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 422                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 423               return i;
 424         }
 425       else
 426         {
 427           for (i = i_start; i < bidi_cache_idx; i++)
 428             if (bidi_cache[i].charpos <= charpos
 429                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 430                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 431               return i;
 432         }
 433     }
 434
 435   return -1;
 436 }
 437
 438 /* Find a cached state where the resolved level changes to a value
 439    that is lower than LEVEL, and return its cache slot index.  DIR is
 440    the direction to search, starting with the last used cache slot.
 441    If DIR is zero, we search backwards from the last occupied cache
 442    slot.  BEFORE, if non-zero, means return the index of the slot that
 443    is ``before'' the level change in the search direction.  That is,
 444    given the cached levels like this:
 445
 446          1122333442211
 447           AB        C
 448
 449    and assuming we are at the position cached at the slot marked with
 450    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 451    index of slot B or A, depending whether BEFORE is, respectively,
 452    non-zero or zero.  */
 453 static ptrdiff_t
 454 bidi_cache_find_level_change (int level, int dir, int before)
 455 {
 456   if (bidi_cache_idx)
 457     {
 458       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 459       int incr = before ? 1 : 0;
 460
 461       xassert (!dir || bidi_cache_last_idx >= 0);
 462
 463       if (!dir)
 464         dir = -1;
 465       else if (!incr)
 466         i += dir;
 467
 468       if (dir < 0)
 469         {
 470           while (i >= bidi_cache_start + incr)
 471             {
 472               if (bidi_cache[i - incr].resolved_level >= 0
 473                   && bidi_cache[i - incr].resolved_level < level)
 474                 return i;
 475               i--;
 476             }
 477         }
 478       else
 479         {
 480           while (i < bidi_cache_idx - incr)
 481             {
 482               if (bidi_cache[i + incr].resolved_level >= 0
 483                   && bidi_cache[i + incr].resolved_level < level)
 484                 return i;
 485               i++;
 486             }
 487         }
 488     }
 489
 490   return -1;
 491 }
 492
 493 static inline void
 494 bidi_cache_ensure_space (ptrdiff_t idx)
 495 {
 496   /* Enlarge the cache as needed.  */
 497   if (idx >= bidi_cache_size)
 498     {
 499       /* The bidi cache cannot be larger than the largest Lisp string
 500          or buffer.  */
 501       ptrdiff_t string_or_buffer_bound
 502         = max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 503
 504       /* Also, it cannot be larger than what C can represent.  */
 505       ptrdiff_t c_bound
 506         = (min (PTRDIFF_MAX, SIZE_MAX) - bidi_shelve_header_size) / elsz;
 507
 508       bidi_cache
 509         = xpalloc (bidi_cache, &bidi_cache_size,
 510                    max (BIDI_CACHE_CHUNK, idx - bidi_cache_size + 1),
 511                    min (string_or_buffer_bound, c_bound), elsz);
 512     }
 513 }
 514
 515 static inline void
 516 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 517 {
 518   ptrdiff_t idx;
 519
 520   /* We should never cache on backward scans.  */
 521   if (bidi_it->scan_dir == -1)
 522     abort ();
 523   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 524
 525   if (idx < 0)
 526     {
 527       idx = bidi_cache_idx;
 528       bidi_cache_ensure_space (idx);
 529       /* Character positions should correspond to cache positions 1:1.
 530          If we are outside the range of cached positions, the cache is
 531          useless and must be reset.  */
 532       if (idx > bidi_cache_start &&
 533           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 534                                + bidi_cache[idx - 1].nchars)
 535            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 536         {
 537           bidi_cache_reset ();
 538           idx = bidi_cache_start;
 539         }
 540       if (bidi_it->nchars <= 0)
 541         abort ();
 542       bidi_copy_it (&bidi_cache[idx], bidi_it);
 543       if (!resolved)
 544         bidi_cache[idx].resolved_level = -1;
 545     }
 546   else
 547     {
 548       /* Copy only the members which could have changed, to avoid
 549          costly copying of the entire struct.  */
 550       bidi_cache[idx].type = bidi_it->type;
 551       bidi_check_type (bidi_it->type);
 552       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 553       bidi_check_type (bidi_it->type_after_w1);
 554       if (resolved)
 555         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 556       else
 557         bidi_cache[idx].resolved_level = -1;
 558       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 559       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 560       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 561       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 562       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 563       bidi_cache[idx].disp_pos = bidi_it->disp_pos;
 564       bidi_cache[idx].disp_prop = bidi_it->disp_prop;
 565     }
 566
 567   bidi_cache_last_idx = idx;
 568   if (idx >= bidi_cache_idx)
 569     bidi_cache_idx = idx + 1;
 570 }
 571
 572 static inline bidi_type_t
 573 bidi_cache_find (ptrdiff_t charpos, int level, struct bidi_it *bidi_it)
 574 {
 575   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 576
 577   if (i >= bidi_cache_start)
 578     {
 579       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 580
 581       bidi_copy_it (bidi_it, &bidi_cache[i]);
 582       bidi_cache_last_idx = i;
 583       /* Don't let scan direction from the cached state override
 584          the current scan direction.  */
 585       bidi_it->scan_dir = current_scan_dir;
 586       return bidi_it->type;
 587     }
 588
 589   return UNKNOWN_BT;
 590 }
 591
 592 static inline int
 593 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 594 {
 595   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 596     abort ();
 597   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 598 }
 599
 600 \f
 601 /***********************************************************************
 602              Pushing and popping the bidi iterator state
 603  ***********************************************************************/
 604
 605 /* Push the bidi iterator state in preparation for reordering a
 606    different object, e.g. display string found at certain buffer
 607    position.  Pushing the bidi iterator boils down to saving its
 608    entire state on the cache and starting a new cache "stacked" on top
 609    of the current cache.  */
 610 void
 611 bidi_push_it (struct bidi_it *bidi_it)
 612 {
 613   /* Save the current iterator state in its entirety after the last
 614      used cache slot.  */
 615   bidi_cache_ensure_space (bidi_cache_idx);
 616   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 617
 618   /* Push the current cache start onto the stack.  */
 619   xassert (bidi_cache_sp < IT_STACK_SIZE);
 620   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 621
 622   /* Start a new level of cache, and make it empty.  */
 623   bidi_cache_start = bidi_cache_idx;
 624   bidi_cache_last_idx = -1;
 625 }
 626
 627 /* Restore the iterator state saved by bidi_push_it and return the
 628    cache to the corresponding state.  */
 629 void
 630 bidi_pop_it (struct bidi_it *bidi_it)
 631 {
 632   if (bidi_cache_start <= 0)
 633     abort ();
 634
 635   /* Reset the next free cache slot index to what it was before the
 636      call to bidi_push_it.  */
 637   bidi_cache_idx = bidi_cache_start - 1;
 638
 639   /* Restore the bidi iterator state saved in the cache.  */
 640   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 641
 642   /* Pop the previous cache start from the stack.  */
 643   if (bidi_cache_sp <= 0)
 644     abort ();
 645   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 646
 647   /* Invalidate the last-used cache slot data.  */
 648   bidi_cache_last_idx = -1;
 649 }
 650
 651 static ptrdiff_t bidi_cache_total_alloc;
 652
 653 /* Stash away a copy of the cache and its control variables.  */
 654 void *
 655 bidi_shelve_cache (void)
 656 {
 657   unsigned char *databuf;
 658   ptrdiff_t alloc;
 659
 660   /* Empty cache.  */
 661   if (bidi_cache_idx == 0)
 662     return NULL;
 663
 664   alloc = (bidi_shelve_header_size
 665            + bidi_cache_idx * sizeof (struct bidi_it));
 666   databuf = xmalloc (alloc);
 667   bidi_cache_total_alloc += alloc;
 668
 669   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 670   memcpy (databuf + sizeof (bidi_cache_idx),
 671           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 672   memcpy (databuf + sizeof (bidi_cache_idx)
 673           + bidi_cache_idx * sizeof (struct bidi_it),
 674           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 675   memcpy (databuf + sizeof (bidi_cache_idx)
 676           + bidi_cache_idx * sizeof (struct bidi_it)
 677           + sizeof (bidi_cache_start_stack),
 678           &bidi_cache_sp, sizeof (bidi_cache_sp));
 679   memcpy (databuf + sizeof (bidi_cache_idx)
 680           + bidi_cache_idx * sizeof (struct bidi_it)
 681           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 682           &bidi_cache_start, sizeof (bidi_cache_start));
 683   memcpy (databuf + sizeof (bidi_cache_idx)
 684           + bidi_cache_idx * sizeof (struct bidi_it)
 685           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 686           + sizeof (bidi_cache_start),
 687           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 688
 689   return databuf;
 690 }
 691
 692 /* Restore the cache state from a copy stashed away by
 693    bidi_shelve_cache, and free the buffer used to stash that copy.
 694    JUST_FREE non-zero means free the buffer, but don't restore the
 695    cache; used when the corresponding iterator is discarded instead of
 696    being restored.  */
 697 void
 698 bidi_unshelve_cache (void *databuf, int just_free)
 699 {
 700   unsigned char *p = databuf;
 701
 702   if (!p)
 703     {
 704       if (!just_free)
 705         {
 706           /* A NULL pointer means an empty cache.  */
 707           bidi_cache_start = 0;
 708           bidi_cache_sp = 0;
 709           bidi_cache_reset ();
 710         }
 711     }
 712   else
 713     {
 714       if (just_free)
 715         {
 716           ptrdiff_t idx;
 717
 718           memcpy (&idx, p, sizeof (bidi_cache_idx));
 719           bidi_cache_total_alloc
 720             -= bidi_shelve_header_size + idx * sizeof (struct bidi_it);
 721         }
 722       else
 723         {
 724           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 725           bidi_cache_ensure_space (bidi_cache_idx);
 726           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 727                   bidi_cache_idx * sizeof (struct bidi_it));
 728           memcpy (bidi_cache_start_stack,
 729                   p + sizeof (bidi_cache_idx)
 730                   + bidi_cache_idx * sizeof (struct bidi_it),
 731                   sizeof (bidi_cache_start_stack));
 732           memcpy (&bidi_cache_sp,
 733                   p + sizeof (bidi_cache_idx)
 734                   + bidi_cache_idx * sizeof (struct bidi_it)
 735                   + sizeof (bidi_cache_start_stack),
 736                   sizeof (bidi_cache_sp));
 737           memcpy (&bidi_cache_start,
 738                   p + sizeof (bidi_cache_idx)
 739                   + bidi_cache_idx * sizeof (struct bidi_it)
 740                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 741                   sizeof (bidi_cache_start));
 742           memcpy (&bidi_cache_last_idx,
 743                   p + sizeof (bidi_cache_idx)
 744                   + bidi_cache_idx * sizeof (struct bidi_it)
 745                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 746                   + sizeof (bidi_cache_start),
 747                   sizeof (bidi_cache_last_idx));
 748           bidi_cache_total_alloc
 749             -= (bidi_shelve_header_size
 750                 + bidi_cache_idx * sizeof (struct bidi_it));
 751         }
 752
 753       xfree (p);
 754     }
 755 }
 756
 757 \f
 758 /***********************************************************************
 759                         Initialization
 760  ***********************************************************************/
 761 static void
 762 bidi_initialize (void)
 763 {
 764   bidi_type_table = uniprop_table (intern ("bidi-class"));
 765   if (NILP (bidi_type_table))
 766     abort ();
 767   staticpro (&bidi_type_table);
 768
 769   bidi_mirror_table = uniprop_table (intern ("mirroring"));
 770   if (NILP (bidi_mirror_table))
 771     abort ();
 772   staticpro (&bidi_mirror_table);
 773
 774   Qparagraph_start = intern ("paragraph-start");
 775   staticpro (&Qparagraph_start);
 776   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 777   if (!STRINGP (paragraph_start_re))
 778     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 779   staticpro (&paragraph_start_re);
 780   Qparagraph_separate = intern ("paragraph-separate");
 781   staticpro (&Qparagraph_separate);
 782   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 783   if (!STRINGP (paragraph_separate_re))
 784     paragraph_separate_re = build_string ("[ \t\f]*$");
 785   staticpro (&paragraph_separate_re);
 786
 787   bidi_cache_sp = 0;
 788   bidi_cache_total_alloc = 0;
 789
 790   bidi_initialized = 1;
 791 }
 792
 793 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 794    end.  */
 795 static inline void
 796 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 797 {
 798   bidi_it->invalid_levels = 0;
 799   bidi_it->invalid_rl_levels = -1;
 800   bidi_it->stack_idx = 0;
 801   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 802 }
 803
 804 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 805 void
 806 bidi_init_it (ptrdiff_t charpos, ptrdiff_t bytepos, int frame_window_p,
 807               struct bidi_it *bidi_it)
 808 {
 809   if (! bidi_initialized)
 810     bidi_initialize ();
 811   if (charpos >= 0)
 812     bidi_it->charpos = charpos;
 813   if (bytepos >= 0)
 814     bidi_it->bytepos = bytepos;
 815   bidi_it->frame_window_p = frame_window_p;
 816   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 817   bidi_it->first_elt = 1;
 818   bidi_set_paragraph_end (bidi_it);
 819   bidi_it->new_paragraph = 1;
 820   bidi_it->separator_limit = -1;
 821   bidi_it->type = NEUTRAL_B;
 822   bidi_it->type_after_w1 = NEUTRAL_B;
 823   bidi_it->orig_type = NEUTRAL_B;
 824   bidi_it->prev_was_pdf = 0;
 825   bidi_it->prev.type = bidi_it->prev.type_after_w1
 826     = bidi_it->prev.orig_type = UNKNOWN_BT;
 827   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1
 828     = bidi_it->last_strong.orig_type = UNKNOWN_BT;
 829   bidi_it->next_for_neutral.charpos = -1;
 830   bidi_it->next_for_neutral.type
 831     = bidi_it->next_for_neutral.type_after_w1
 832     = bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 833   bidi_it->prev_for_neutral.charpos = -1;
 834   bidi_it->prev_for_neutral.type
 835     = bidi_it->prev_for_neutral.type_after_w1
 836     = bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 837   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 838   bidi_it->disp_pos = -1;       /* invalid/unknown */
 839   bidi_it->disp_prop = 0;
 840   /* We can only shrink the cache if we are at the bottom level of its
 841      "stack".  */
 842   if (bidi_cache_start == 0)
 843     bidi_cache_shrink ();
 844   else
 845     bidi_cache_reset ();
 846 }
 847
 848 /* Perform initializations for reordering a new line of bidi text.  */
 849 static void
 850 bidi_line_init (struct bidi_it *bidi_it)
 851 {
 852   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 853   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 854   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 855   bidi_it->invalid_levels = 0;
 856   bidi_it->invalid_rl_levels = -1;
 857   /* Setting this to zero will force its recomputation the first time
 858      we need it for W5.  */
 859   bidi_it->next_en_pos = 0;
 860   bidi_it->next_en_type = UNKNOWN_BT;
 861   bidi_it->next_for_ws.type = UNKNOWN_BT;
 862   bidi_set_sor_type (bidi_it,
 863                      (bidi_it->paragraph_dir == R2L ? 1 : 0),
 864                      bidi_it->level_stack[0].level); /* X10 */
 865
 866   bidi_cache_reset ();
 867 }
 868
 869 \f
 870 /***********************************************************************
 871                         Fetching characters
 872  ***********************************************************************/
 873
 874 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 875    are zero-based character positions in S, BEGBYTE is byte position
 876    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 877    string.  */
 878 static inline ptrdiff_t
 879 bidi_count_bytes (const unsigned char *s, const ptrdiff_t beg,
 880                   const ptrdiff_t begbyte, const ptrdiff_t end, int unibyte)
 881 {
 882   ptrdiff_t pos = beg;
 883   const unsigned char *p = s + begbyte, *start = p;
 884
 885   if (unibyte)
 886     p = s + end;
 887   else
 888     {
 889       if (!CHAR_HEAD_P (*p))
 890         abort ();
 891
 892       while (pos < end)
 893         {
 894           p += BYTES_BY_CHAR_HEAD (*p);
 895           pos++;
 896         }
 897     }
 898
 899   return p - start;
 900 }
 901
 902 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 903    non-NULL, fetch the character from string S; otherwise fetch the
 904    character from the current buffer.  UNIBYTE non-zero means S is a
 905    unibyte string.  */
 906 static inline int
 907 bidi_char_at_pos (ptrdiff_t bytepos, const unsigned char *s, int unibyte)
 908 {
 909   if (s)
 910     {
 911       if (unibyte)
 912         return s[bytepos];
 913       else
 914         return STRING_CHAR (s + bytepos);
 915     }
 916   else
 917     return FETCH_MULTIBYTE_CHAR (bytepos);
 918 }
 919
 920 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 921    character is covered by a display string, treat the entire run of
 922    covered characters as a single character, either u+2029 or u+FFFC,
 923    and return their combined length in CH_LEN and NCHARS.  DISP_POS
 924    specifies the character position of the next display string, or -1
 925    if not yet computed.  When the next character is at or beyond that
 926    position, the function updates DISP_POS with the position of the
 927    next display string.  DISP_PROP non-zero means that there's really
 928    a display string at DISP_POS, as opposed to when we searched till
 929    DISP_POS without finding one.  If DISP_PROP is 2, it means the
 930    display spec is of the form `(space ...)', which is replaced with
 931    u+2029 to handle it as a paragraph separator.  STRING->s is the C
 932    string to iterate, or NULL if iterating over a buffer or a Lisp
 933    string; in the latter case, STRING->lstring is the Lisp string.  */
 934 static inline int
 935 bidi_fetch_char (ptrdiff_t bytepos, ptrdiff_t charpos, ptrdiff_t *disp_pos,
 936                  int *disp_prop, struct bidi_string_data *string,
 937                  int frame_window_p, ptrdiff_t *ch_len, ptrdiff_t *nchars)
 938 {
 939   int ch;
 940   ptrdiff_t endpos
 941     = (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 942   struct text_pos pos;
 943   int len;
 944
 945   /* If we got past the last known position of display string, compute
 946      the position of the next one.  That position could be at CHARPOS.  */
 947   if (charpos < endpos && charpos > *disp_pos)
 948     {
 949       SET_TEXT_POS (pos, charpos, bytepos);
 950       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 951                                               disp_prop);
 952     }
 953
 954   /* Fetch the character at BYTEPOS.  */
 955   if (charpos >= endpos)
 956     {
 957       ch = BIDI_EOB;
 958       *ch_len = 1;
 959       *nchars = 1;
 960       *disp_pos = endpos;
 961       *disp_prop = 0;
 962     }
 963   else if (charpos >= *disp_pos && *disp_prop)
 964     {
 965       ptrdiff_t disp_end_pos;
 966
 967       /* We don't expect to find ourselves in the middle of a display
 968          property.  Hopefully, it will never be needed.  */
 969       if (charpos > *disp_pos)
 970         abort ();
 971       /* Text covered by `display' properties and overlays with
 972          display properties or display strings is handled as a single
 973          character that represents the entire run of characters
 974          covered by the display property.  */
 975       if (*disp_prop == 2)
 976         {
 977           /* `(space ...)' display specs are handled as paragraph
 978              separators for the purposes of the reordering; see UAX#9
 979              section 3 and clause HL1 in section 4.3 there.  */
 980           ch = 0x2029;
 981         }
 982       else
 983         {
 984           /* All other display specs are handled as the Unicode Object
 985              Replacement Character.  */
 986           ch = 0xFFFC;
 987         }
 988       disp_end_pos = compute_display_string_end (*disp_pos, string);
 989       if (disp_end_pos < 0)
 990         {
 991           /* Somebody removed the display string from the buffer
 992              behind our back.  Recover by processing this buffer
 993              position as if no display property were present there to
 994              begin with.  */
 995           *disp_prop = 0;
 996           goto normal_char;
 997         }
 998       *nchars = disp_end_pos - *disp_pos;
 999       if (*nchars <= 0)
1000         abort ();
1001       if (string->s)
1002         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
1003                                     disp_end_pos, string->unibyte);
1004       else if (STRINGP (string->lstring))
1005         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
1006                                     bytepos, disp_end_pos, string->unibyte);
1007       else
1008         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
1009     }
1010   else
1011     {
1012     normal_char:
1013       if (string->s)
1014         {
1015
1016           if (!string->unibyte)
1017             {
1018               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
1019               *ch_len = len;
1020             }
1021           else
1022             {
1023               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
1024               *ch_len = 1;
1025             }
1026         }
1027       else if (STRINGP (string->lstring))
1028         {
1029           if (!string->unibyte)
1030             {
1031               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1032                                            len);
1033               *ch_len = len;
1034             }
1035           else
1036             {
1037               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1038               *ch_len = 1;
1039             }
1040         }
1041       else
1042         {
1043           ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
1044           *ch_len = len;
1045         }
1046       *nchars = 1;
1047     }
1048
1049   /* If we just entered a run of characters covered by a display
1050      string, compute the position of the next display string.  */
1051   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1052       && *disp_prop)
1053     {
1054       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1055       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1056                                               disp_prop);
1057     }
1058
1059   return ch;
1060 }
1061
1062 \f
1063 /***********************************************************************
1064                         Determining paragraph direction
1065  ***********************************************************************/
1066
1067 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1068    Value is the non-negative length of the paragraph separator
1069    following the buffer position, -1 if position is at the beginning
1070    of a new paragraph, or -2 if position is neither at beginning nor
1071    at end of a paragraph.  */
1072 static ptrdiff_t
1073 bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
1074 {
1075   Lisp_Object sep_re;
1076   Lisp_Object start_re;
1077   ptrdiff_t val;
1078
1079   sep_re = paragraph_separate_re;
1080   start_re = paragraph_start_re;
1081
1082   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1083   if (val < 0)
1084     {
1085       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1086         val = -1;
1087       else
1088         val = -2;
1089     }
1090
1091   return val;
1092 }
1093
1094 /* On my 2005-vintage machine, searching back for paragraph start
1095    takes ~1 ms per line.  And bidi_paragraph_init is called 4 times
1096    when user types C-p.  The number below limits each call to
1097    bidi_paragraph_init to about 10 ms.  */
1098 #define MAX_PARAGRAPH_SEARCH 7500
1099
1100 /* Find the beginning of this paragraph by looking back in the buffer.
1101    Value is the byte position of the paragraph's beginning, or
1102    BEGV_BYTE if paragraph_start_re is still not found after looking
1103    back MAX_PARAGRAPH_SEARCH lines in the buffer.  */
1104 static ptrdiff_t
1105 bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
1106 {
1107   Lisp_Object re = paragraph_start_re;
1108   ptrdiff_t limit = ZV, limit_byte = ZV_BYTE;
1109   ptrdiff_t n = 0;
1110
1111   while (pos_byte > BEGV_BYTE
1112          && n++ < MAX_PARAGRAPH_SEARCH
1113          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1114     {
1115       /* FIXME: What if the paragraph beginning is covered by a
1116          display string?  And what if a display string covering some
1117          of the text over which we scan back includes
1118          paragraph_start_re?  */
1119       pos = find_next_newline_no_quit (pos - 1, -1);
1120       pos_byte = CHAR_TO_BYTE (pos);
1121     }
1122   if (n >= MAX_PARAGRAPH_SEARCH)
1123     pos_byte = BEGV_BYTE;
1124   return pos_byte;
1125 }
1126
1127 /* Determine the base direction, a.k.a. base embedding level, of the
1128    paragraph we are about to iterate through.  If DIR is either L2R or
1129    R2L, just use that.  Otherwise, determine the paragraph direction
1130    from the first strong directional character of the paragraph.
1131
1132    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1133    has no strong directional characters and both DIR and
1134    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1135    in the buffer until a paragraph is found with a strong character,
1136    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1137    flag is used in current-bidi-paragraph-direction.
1138
1139    Note that this function gives the paragraph separator the same
1140    direction as the preceding paragraph, even though Emacs generally
1141    views the separator as not belonging to any paragraph.  */
1142 void
1143 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1144 {
1145   ptrdiff_t bytepos = bidi_it->bytepos;
1146   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1147   ptrdiff_t pstartbyte;
1148   /* Note that begbyte is a byte position, while end is a character
1149      position.  Yes, this is ugly, but we are trying to avoid costly
1150      calls to BYTE_TO_CHAR and its ilk.  */
1151   ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
1152   ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
1153
1154   /* Special case for an empty buffer. */
1155   if (bytepos == begbyte && bidi_it->charpos == end)
1156     dir = L2R;
1157   /* We should never be called at EOB or before BEGV.  */
1158   else if (bidi_it->charpos >= end || bytepos < begbyte)
1159     abort ();
1160
1161   if (dir == L2R)
1162     {
1163       bidi_it->paragraph_dir = L2R;
1164       bidi_it->new_paragraph = 0;
1165     }
1166   else if (dir == R2L)
1167     {
1168       bidi_it->paragraph_dir = R2L;
1169       bidi_it->new_paragraph = 0;
1170     }
1171   else if (dir == NEUTRAL_DIR)  /* P2 */
1172     {
1173       int ch;
1174       ptrdiff_t ch_len, nchars;
1175       ptrdiff_t pos, disp_pos = -1;
1176       int disp_prop = 0;
1177       bidi_type_t type;
1178       const unsigned char *s;
1179
1180       if (!bidi_initialized)
1181         bidi_initialize ();
1182
1183       /* If we are inside a paragraph separator, we are just waiting
1184          for the separator to be exhausted; use the previous paragraph
1185          direction.  But don't do that if we have been just reseated,
1186          because we need to reinitialize below in that case.  */
1187       if (!bidi_it->first_elt
1188           && bidi_it->charpos < bidi_it->separator_limit)
1189         return;
1190
1191       /* If we are on a newline, get past it to where the next
1192          paragraph might start.  But don't do that at BEGV since then
1193          we are potentially in a new paragraph that doesn't yet
1194          exist.  */
1195       pos = bidi_it->charpos;
1196       s = (STRINGP (bidi_it->string.lstring)
1197            ? SDATA (bidi_it->string.lstring)
1198            : bidi_it->string.s);
1199       if (bytepos > begbyte
1200           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1201         {
1202           bytepos++;
1203           pos++;
1204         }
1205
1206       /* We are either at the beginning of a paragraph or in the
1207          middle of it.  Find where this paragraph starts.  */
1208       if (string_p)
1209         {
1210           /* We don't support changes of paragraph direction inside a
1211              string.  It is treated as a single paragraph.  */
1212           pstartbyte = 0;
1213         }
1214       else
1215         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1216       bidi_it->separator_limit = -1;
1217       bidi_it->new_paragraph = 0;
1218
1219       /* The following loop is run more than once only if NO_DEFAULT_P
1220          is non-zero, and only if we are iterating on a buffer.  */
1221       do {
1222         bytepos = pstartbyte;
1223         if (!string_p)
1224           pos = BYTE_TO_CHAR (bytepos);
1225         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop,
1226                               &bidi_it->string,
1227                               bidi_it->frame_window_p, &ch_len, &nchars);
1228         type = bidi_get_type (ch, NEUTRAL_DIR);
1229
1230         for (pos += nchars, bytepos += ch_len;
1231              (bidi_get_category (type) != STRONG)
1232                || (bidi_ignore_explicit_marks_for_paragraph_level
1233                    && (type == RLE || type == RLO
1234                        || type == LRE || type == LRO));
1235              type = bidi_get_type (ch, NEUTRAL_DIR))
1236           {
1237             if (pos >= end)
1238               {
1239                 /* Pretend there's a paragraph separator at end of
1240                    buffer/string.  */
1241                 type = NEUTRAL_B;
1242                 break;
1243               }
1244             if (!string_p
1245                 && type == NEUTRAL_B
1246                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1247               break;
1248             /* Fetch next character and advance to get past it.  */
1249             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1250                                   &disp_prop, &bidi_it->string,
1251                                   bidi_it->frame_window_p, &ch_len, &nchars);
1252             pos += nchars;
1253             bytepos += ch_len;
1254           }
1255         if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1256             || (!bidi_ignore_explicit_marks_for_paragraph_level
1257                 && (type == RLO || type == RLE)))
1258           bidi_it->paragraph_dir = R2L;
1259         else if (type == STRONG_L
1260                  || (!bidi_ignore_explicit_marks_for_paragraph_level
1261                      && (type == LRO || type == LRE)))
1262           bidi_it->paragraph_dir = L2R;
1263         if (!string_p
1264             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1265           {
1266             /* If this paragraph is at BEGV, default to L2R.  */
1267             if (pstartbyte == BEGV_BYTE)
1268               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1269             else
1270               {
1271                 ptrdiff_t prevpbyte = pstartbyte;
1272                 ptrdiff_t p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1273
1274                 /* Find the beginning of the previous paragraph, if any.  */
1275                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1276                   {
1277                     /* FXIME: What if p is covered by a display
1278                        string?  See also a FIXME inside
1279                        bidi_find_paragraph_start.  */
1280                     p--;
1281                     pbyte = CHAR_TO_BYTE (p);
1282                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1283                   }
1284                 pstartbyte = prevpbyte;
1285               }
1286           }
1287       } while (!string_p
1288                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1289     }
1290   else
1291     abort ();
1292
1293   /* Contrary to UAX#9 clause P3, we only default the paragraph
1294      direction to L2R if we have no previous usable paragraph
1295      direction.  This is allowed by the HL1 clause.  */
1296   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1297     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1298   if (bidi_it->paragraph_dir == R2L)
1299     bidi_it->level_stack[0].level = 1;
1300   else
1301     bidi_it->level_stack[0].level = 0;
1302
1303   bidi_line_init (bidi_it);
1304 }
1305
1306 \f
1307 /***********************************************************************
1308                  Resolving explicit and implicit levels.
1309   The rest of this file constitutes the core of the UBA implementation.
1310  ***********************************************************************/
1311
1312 static inline int
1313 bidi_explicit_dir_char (int ch)
1314 {
1315   bidi_type_t ch_type;
1316
1317   if (!bidi_initialized)
1318     abort ();
1319   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1320   return (ch_type == LRE || ch_type == LRO
1321           || ch_type == RLE || ch_type == RLO
1322           || ch_type == PDF);
1323 }
1324
1325 /* A helper function for bidi_resolve_explicit.  It advances to the
1326    next character in logical order and determines the new embedding
1327    level and directional override, but does not take into account
1328    empty embeddings.  */
1329 static int
1330 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1331 {
1332   int curchar;
1333   bidi_type_t type;
1334   int current_level;
1335   int new_level;
1336   bidi_dir_t override;
1337   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1338
1339   /* If reseat()'ed, don't advance, so as to start iteration from the
1340      position where we were reseated.  bidi_it->bytepos can be less
1341      than BEGV_BYTE after reseat to BEGV.  */
1342   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1343       || bidi_it->first_elt)
1344     {
1345       bidi_it->first_elt = 0;
1346       if (string_p)
1347         {
1348           const unsigned char *p
1349             = (STRINGP (bidi_it->string.lstring)
1350                ? SDATA (bidi_it->string.lstring)
1351                : bidi_it->string.s);
1352
1353           if (bidi_it->charpos < 0)
1354             bidi_it->charpos = 0;
1355           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1356                                                bidi_it->string.unibyte);
1357         }
1358       else
1359         {
1360           if (bidi_it->charpos < BEGV)
1361             bidi_it->charpos = BEGV;
1362           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1363         }
1364     }
1365   /* Don't move at end of buffer/string.  */
1366   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1367     {
1368       /* Advance to the next character, skipping characters covered by
1369          display strings (nchars > 1).  */
1370       if (bidi_it->nchars <= 0)
1371         abort ();
1372       bidi_it->charpos += bidi_it->nchars;
1373       if (bidi_it->ch_len == 0)
1374         abort ();
1375       bidi_it->bytepos += bidi_it->ch_len;
1376     }
1377
1378   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1379   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1380   new_level = current_level;
1381
1382   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1383     {
1384       curchar = BIDI_EOB;
1385       bidi_it->ch_len = 1;
1386       bidi_it->nchars = 1;
1387       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1388       bidi_it->disp_prop = 0;
1389     }
1390   else
1391     {
1392       /* Fetch the character at BYTEPOS.  If it is covered by a
1393          display string, treat the entire run of covered characters as
1394          a single character u+FFFC.  */
1395       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1396                                  &bidi_it->disp_pos, &bidi_it->disp_prop,
1397                                  &bidi_it->string, bidi_it->frame_window_p,
1398                                  &bidi_it->ch_len, &bidi_it->nchars);
1399     }
1400   bidi_it->ch = curchar;
1401
1402   /* Don't apply directional override here, as all the types we handle
1403      below will not be affected by the override anyway, and we need
1404      the original type unaltered.  The override will be applied in
1405      bidi_resolve_weak.  */
1406   type = bidi_get_type (curchar, NEUTRAL_DIR);
1407   bidi_it->orig_type = type;
1408   bidi_check_type (bidi_it->orig_type);
1409
1410   if (type != PDF)
1411     bidi_it->prev_was_pdf = 0;
1412
1413   bidi_it->type_after_w1 = UNKNOWN_BT;
1414
1415   switch (type)
1416     {
1417       case RLE: /* X2 */
1418       case RLO: /* X4 */
1419         bidi_it->type_after_w1 = type;
1420         bidi_check_type (bidi_it->type_after_w1);
1421         type = WEAK_BN; /* X9/Retaining */
1422         if (bidi_it->ignore_bn_limit <= -1)
1423           {
1424             if (current_level <= BIDI_MAXLEVEL - 4)
1425               {
1426                 /* Compute the least odd embedding level greater than
1427                    the current level.  */
1428                 new_level = ((current_level + 1) & ~1) + 1;
1429                 if (bidi_it->type_after_w1 == RLE)
1430                   override = NEUTRAL_DIR;
1431                 else
1432                   override = R2L;
1433                 if (current_level == BIDI_MAXLEVEL - 4)
1434                   bidi_it->invalid_rl_levels = 0;
1435                 bidi_push_embedding_level (bidi_it, new_level, override);
1436               }
1437             else
1438               {
1439                 bidi_it->invalid_levels++;
1440                 /* See the commentary about invalid_rl_levels below.  */
1441                 if (bidi_it->invalid_rl_levels < 0)
1442                   bidi_it->invalid_rl_levels = 0;
1443                 bidi_it->invalid_rl_levels++;
1444               }
1445           }
1446         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1447                  || (bidi_it->next_en_pos > bidi_it->charpos
1448                      && bidi_it->next_en_type == WEAK_EN))
1449           type = WEAK_EN;
1450         break;
1451       case LRE: /* X3 */
1452       case LRO: /* X5 */
1453         bidi_it->type_after_w1 = type;
1454         bidi_check_type (bidi_it->type_after_w1);
1455         type = WEAK_BN; /* X9/Retaining */
1456         if (bidi_it->ignore_bn_limit <= -1)
1457           {
1458             if (current_level <= BIDI_MAXLEVEL - 5)
1459               {
1460                 /* Compute the least even embedding level greater than
1461                    the current level.  */
1462                 new_level = ((current_level + 2) & ~1);
1463                 if (bidi_it->type_after_w1 == LRE)
1464                   override = NEUTRAL_DIR;
1465                 else
1466                   override = L2R;
1467                 bidi_push_embedding_level (bidi_it, new_level, override);
1468               }
1469             else
1470               {
1471                 bidi_it->invalid_levels++;
1472                 /* invalid_rl_levels counts invalid levels encountered
1473                    while the embedding level was already too high for
1474                    LRE/LRO, but not for RLE/RLO.  That is because
1475                    there may be exactly one PDF which we should not
1476                    ignore even though invalid_levels is non-zero.
1477                    invalid_rl_levels helps to know what PDF is
1478                    that.  */
1479                 if (bidi_it->invalid_rl_levels >= 0)
1480                   bidi_it->invalid_rl_levels++;
1481               }
1482           }
1483         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1484                  || (bidi_it->next_en_pos > bidi_it->charpos
1485                      && bidi_it->next_en_type == WEAK_EN))
1486           type = WEAK_EN;
1487         break;
1488       case PDF: /* X7 */
1489         bidi_it->type_after_w1 = type;
1490         bidi_check_type (bidi_it->type_after_w1);
1491         type = WEAK_BN; /* X9/Retaining */
1492         if (bidi_it->ignore_bn_limit <= -1)
1493           {
1494             if (!bidi_it->invalid_rl_levels)
1495               {
1496                 new_level = bidi_pop_embedding_level (bidi_it);
1497                 bidi_it->invalid_rl_levels = -1;
1498                 if (bidi_it->invalid_levels)
1499                   bidi_it->invalid_levels--;
1500                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1501               }
1502             if (!bidi_it->invalid_levels)
1503               new_level = bidi_pop_embedding_level (bidi_it);
1504             else
1505               {
1506                 bidi_it->invalid_levels--;
1507                 bidi_it->invalid_rl_levels--;
1508               }
1509           }
1510         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1511                  || (bidi_it->next_en_pos > bidi_it->charpos
1512                      && bidi_it->next_en_type == WEAK_EN))
1513           type = WEAK_EN;
1514         break;
1515       default:
1516         /* Nothing.  */
1517         break;
1518     }
1519
1520   bidi_it->type = type;
1521   bidi_check_type (bidi_it->type);
1522
1523   return new_level;
1524 }
1525
1526 /* Given an iterator state in BIDI_IT, advance one character position
1527    in the buffer/string to the next character (in the logical order),
1528    resolve any explicit embeddings and directional overrides, and
1529    return the embedding level of the character after resolving
1530    explicit directives and ignoring empty embeddings.  */
1531 static int
1532 bidi_resolve_explicit (struct bidi_it *bidi_it)
1533 {
1534   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1535   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1536   ptrdiff_t eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1537   const unsigned char *s
1538     = (STRINGP (bidi_it->string.lstring)
1539        ? SDATA (bidi_it->string.lstring)
1540        : bidi_it->string.s);
1541
1542   if (prev_level < new_level
1543       && bidi_it->type == WEAK_BN
1544       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1545       && bidi_it->charpos < eob         /* not already at EOB */
1546       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1547                                                    + bidi_it->ch_len, s,
1548                                                    bidi_it->string.unibyte)))
1549     {
1550       /* Avoid pushing and popping embedding levels if the level run
1551          is empty, as this breaks level runs where it shouldn't.
1552          UAX#9 removes all the explicit embedding and override codes,
1553          so empty embeddings disappear without a trace.  We need to
1554          behave as if we did the same.  */
1555       struct bidi_it saved_it;
1556       int level = prev_level;
1557
1558       bidi_copy_it (&saved_it, bidi_it);
1559
1560       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1561                                                        + bidi_it->ch_len, s,
1562                                                        bidi_it->string.unibyte)))
1563         {
1564           /* This advances to the next character, skipping any
1565              characters covered by display strings.  */
1566           level = bidi_resolve_explicit_1 (bidi_it);
1567           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1568              a pointer to its data is no longer valid.  */
1569           if (STRINGP (bidi_it->string.lstring))
1570             s = SDATA (bidi_it->string.lstring);
1571         }
1572
1573       if (bidi_it->nchars <= 0)
1574         abort ();
1575       if (level == prev_level)  /* empty embedding */
1576         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1577       else                      /* this embedding is non-empty */
1578         saved_it.ignore_bn_limit = -2;
1579
1580       bidi_copy_it (bidi_it, &saved_it);
1581       if (bidi_it->ignore_bn_limit > -1)
1582         {
1583           /* We pushed a level, but we shouldn't have.  Undo that. */
1584           if (!bidi_it->invalid_rl_levels)
1585             {
1586               new_level = bidi_pop_embedding_level (bidi_it);
1587               bidi_it->invalid_rl_levels = -1;
1588               if (bidi_it->invalid_levels)
1589                 bidi_it->invalid_levels--;
1590             }
1591           if (!bidi_it->invalid_levels)
1592             new_level = bidi_pop_embedding_level (bidi_it);
1593           else
1594             {
1595               bidi_it->invalid_levels--;
1596               bidi_it->invalid_rl_levels--;
1597             }
1598         }
1599     }
1600
1601   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1602     {
1603       bidi_set_paragraph_end (bidi_it);
1604       /* This is needed by bidi_resolve_weak below, and in L1.  */
1605       bidi_it->type_after_w1 = bidi_it->type;
1606       bidi_check_type (bidi_it->type_after_w1);
1607     }
1608
1609   return new_level;
1610 }
1611
1612 /* Advance in the buffer/string, resolve weak types and return the
1613    type of the next character after weak type resolution.  */
1614 static bidi_type_t
1615 bidi_resolve_weak (struct bidi_it *bidi_it)
1616 {
1617   bidi_type_t type;
1618   bidi_dir_t override;
1619   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1620   int new_level  = bidi_resolve_explicit (bidi_it);
1621   int next_char;
1622   bidi_type_t type_of_next;
1623   struct bidi_it saved_it;
1624   ptrdiff_t eob
1625     = ((STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1626        ? bidi_it->string.schars : ZV);
1627
1628   type = bidi_it->type;
1629   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1630
1631   if (type == UNKNOWN_BT
1632       || type == LRE
1633       || type == LRO
1634       || type == RLE
1635       || type == RLO
1636       || type == PDF)
1637     abort ();
1638
1639   if (new_level != prev_level
1640       || bidi_it->type == NEUTRAL_B)
1641     {
1642       /* We've got a new embedding level run, compute the directional
1643          type of sor and initialize per-run variables (UAX#9, clause
1644          X10).  */
1645       bidi_set_sor_type (bidi_it, prev_level, new_level);
1646     }
1647   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1648            || type == WEAK_BN || type == STRONG_AL)
1649     bidi_it->type_after_w1 = type;      /* needed in L1 */
1650   bidi_check_type (bidi_it->type_after_w1);
1651
1652   /* Level and directional override status are already recorded in
1653      bidi_it, and do not need any change; see X6.  */
1654   if (override == R2L)          /* X6 */
1655     type = STRONG_R;
1656   else if (override == L2R)
1657     type = STRONG_L;
1658   else
1659     {
1660       if (type == WEAK_NSM)     /* W1 */
1661         {
1662           /* Note that we don't need to consider the case where the
1663              prev character has its type overridden by an RLO or LRO,
1664              because then either the type of this NSM would have been
1665              also overridden, or the previous character is outside the
1666              current level run, and thus not relevant to this NSM.
1667              This is why NSM gets the type_after_w1 of the previous
1668              character.  */
1669           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1670               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1671               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1672             type = bidi_it->prev.type_after_w1;
1673           else if (bidi_it->sor == R2L)
1674             type = STRONG_R;
1675           else if (bidi_it->sor == L2R)
1676             type = STRONG_L;
1677           else /* shouldn't happen! */
1678             abort ();
1679         }
1680       if (type == WEAK_EN       /* W2 */
1681           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1682         type = WEAK_AN;
1683       else if (type == STRONG_AL) /* W3 */
1684         type = STRONG_R;
1685       else if ((type == WEAK_ES /* W4 */
1686                 && bidi_it->prev.type_after_w1 == WEAK_EN
1687                 && bidi_it->prev.orig_type == WEAK_EN)
1688                || (type == WEAK_CS
1689                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1690                         && bidi_it->prev.orig_type == WEAK_EN)
1691                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1692         {
1693           const unsigned char *s
1694             = (STRINGP (bidi_it->string.lstring)
1695                ? SDATA (bidi_it->string.lstring)
1696                : bidi_it->string.s);
1697
1698           next_char = (bidi_it->charpos + bidi_it->nchars >= eob
1699                        ? BIDI_EOB
1700                        : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len,
1701                                            s, bidi_it->string.unibyte));
1702           type_of_next = bidi_get_type (next_char, override);
1703
1704           if (type_of_next == WEAK_BN
1705               || bidi_explicit_dir_char (next_char))
1706             {
1707               bidi_copy_it (&saved_it, bidi_it);
1708               while (bidi_resolve_explicit (bidi_it) == new_level
1709                      && bidi_it->type == WEAK_BN)
1710                 ;
1711               type_of_next = bidi_it->type;
1712               bidi_copy_it (bidi_it, &saved_it);
1713             }
1714
1715           /* If the next character is EN, but the last strong-type
1716              character is AL, that next EN will be changed to AN when
1717              we process it in W2 above.  So in that case, this ES
1718              should not be changed into EN.  */
1719           if (type == WEAK_ES
1720               && type_of_next == WEAK_EN
1721               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1722             type = WEAK_EN;
1723           else if (type == WEAK_CS)
1724             {
1725               if (bidi_it->prev.type_after_w1 == WEAK_AN
1726                   && (type_of_next == WEAK_AN
1727                       /* If the next character is EN, but the last
1728                          strong-type character is AL, EN will be later
1729                          changed to AN when we process it in W2 above.
1730                          So in that case, this ES should not be
1731                          changed into EN.  */
1732                       || (type_of_next == WEAK_EN
1733                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1734                 type = WEAK_AN;
1735               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1736                        && type_of_next == WEAK_EN
1737                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1738                 type = WEAK_EN;
1739             }
1740         }
1741       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1742                || type == WEAK_BN)      /* W5/Retaining */
1743         {
1744           if (bidi_it->prev.type_after_w1 == WEAK_EN) /* ET/BN w/EN before it */
1745             type = WEAK_EN;
1746           else if (bidi_it->next_en_pos > bidi_it->charpos
1747                    && bidi_it->next_en_type != WEAK_BN)
1748             {
1749               if (bidi_it->next_en_type == WEAK_EN) /* ET/BN with EN after it */
1750                 type = WEAK_EN;
1751             }
1752           else if (bidi_it->next_en_pos >=0)
1753             {
1754               ptrdiff_t en_pos = bidi_it->charpos + bidi_it->nchars;
1755               const unsigned char *s = (STRINGP (bidi_it->string.lstring)
1756                                         ? SDATA (bidi_it->string.lstring)
1757                                         : bidi_it->string.s);
1758
1759               if (bidi_it->nchars <= 0)
1760                 abort ();
1761               next_char
1762                 = (bidi_it->charpos + bidi_it->nchars >= eob
1763                    ? BIDI_EOB
1764                    : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1765                                        bidi_it->string.unibyte));
1766               type_of_next = bidi_get_type (next_char, override);
1767
1768               if (type_of_next == WEAK_ET
1769                   || type_of_next == WEAK_BN
1770                   || bidi_explicit_dir_char (next_char))
1771                 {
1772                   bidi_copy_it (&saved_it, bidi_it);
1773                   while (bidi_resolve_explicit (bidi_it) == new_level
1774                          && (bidi_it->type == WEAK_BN
1775                              || bidi_it->type == WEAK_ET))
1776                     ;
1777                   type_of_next = bidi_it->type;
1778                   en_pos = bidi_it->charpos;
1779                   bidi_copy_it (bidi_it, &saved_it);
1780                 }
1781               /* Remember this position, to speed up processing of the
1782                  next ETs.  */
1783               bidi_it->next_en_pos = en_pos;
1784               if (type_of_next == WEAK_EN)
1785                 {
1786                   /* If the last strong character is AL, the EN we've
1787                      found will become AN when we get to it (W2). */
1788                   if (bidi_it->last_strong.type_after_w1 == STRONG_AL)
1789                     type_of_next = WEAK_AN;
1790                   else if (type == WEAK_BN)
1791                     type = NEUTRAL_ON; /* W6/Retaining */
1792                   else
1793                     type = WEAK_EN;
1794                 }
1795               else if (type_of_next == NEUTRAL_B)
1796                 /* Record the fact that there are no more ENs from
1797                    here to the end of paragraph, to avoid entering the
1798                    loop above ever again in this paragraph.  */
1799                 bidi_it->next_en_pos = -1;
1800               /* Record the type of the character where we ended our search.  */
1801               bidi_it->next_en_type = type_of_next;
1802             }
1803         }
1804     }
1805
1806   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1807       || (type == WEAK_BN
1808           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1809               || bidi_it->prev.type_after_w1 == WEAK_ES
1810               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1811     type = NEUTRAL_ON;
1812
1813   /* Store the type we've got so far, before we clobber it with strong
1814      types in W7 and while resolving neutral types.  But leave alone
1815      the original types that were recorded above, because we will need
1816      them for the L1 clause.  */
1817   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1818     bidi_it->type_after_w1 = type;
1819   bidi_check_type (bidi_it->type_after_w1);
1820
1821   if (type == WEAK_EN)  /* W7 */
1822     {
1823       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1824           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1825         type = STRONG_L;
1826     }
1827
1828   bidi_it->type = type;
1829   bidi_check_type (bidi_it->type);
1830   return type;
1831 }
1832
1833 /* Resolve the type of a neutral character according to the type of
1834    surrounding strong text and the current embedding level.  */
1835 static inline bidi_type_t
1836 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1837 {
1838   /* N1: European and Arabic numbers are treated as though they were R.  */
1839   if (next_type == WEAK_EN || next_type == WEAK_AN)
1840     next_type = STRONG_R;
1841   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1842     prev_type = STRONG_R;
1843
1844   if (next_type == prev_type)   /* N1 */
1845     return next_type;
1846   else if ((lev & 1) == 0)      /* N2 */
1847     return STRONG_L;
1848   else
1849     return STRONG_R;
1850 }
1851
1852 static bidi_type_t
1853 bidi_resolve_neutral (struct bidi_it *bidi_it)
1854 {
1855   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1856   bidi_type_t type = bidi_resolve_weak (bidi_it);
1857   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1858
1859   if (!(type == STRONG_R
1860         || type == STRONG_L
1861         || type == WEAK_BN
1862         || type == WEAK_EN
1863         || type == WEAK_AN
1864         || type == NEUTRAL_B
1865         || type == NEUTRAL_S
1866         || type == NEUTRAL_WS
1867         || type == NEUTRAL_ON))
1868     abort ();
1869
1870   if ((type != NEUTRAL_B /* Don't risk entering the long loop below if
1871                             we are already at paragraph end.  */
1872        && bidi_get_category (type) == NEUTRAL)
1873       || (type == WEAK_BN && prev_level == current_level))
1874     {
1875       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1876         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1877                                        bidi_it->next_for_neutral.type,
1878                                        current_level);
1879       /* The next two "else if" clauses are shortcuts for the
1880          important special case when we have a long sequence of
1881          neutral or WEAK_BN characters, such as whitespace or nulls or
1882          other control characters, on the base embedding level of the
1883          paragraph, and that sequence goes all the way to the end of
1884          the paragraph and follows a character whose resolved
1885          directionality is identical to the base embedding level.
1886          (This is what happens in a buffer with plain L2R text that
1887          happens to include long sequences of control characters.)  By
1888          virtue of N1, the result of examining this long sequence will
1889          always be either STRONG_L or STRONG_R, depending on the base
1890          embedding level.  So we use this fact directly instead of
1891          entering the expensive loop in the "else" clause.  */
1892       else if (current_level == 0
1893                && bidi_it->prev_for_neutral.type == STRONG_L
1894                && !bidi_explicit_dir_char (bidi_it->ch))
1895         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1896                                        STRONG_L, current_level);
1897       else if (/* current level is 1 */
1898                current_level == 1
1899                /* base embedding level is also 1 */
1900                && bidi_it->level_stack[0].level == 1
1901                /* previous character is one of those considered R for
1902                   the purposes of W5 */
1903                && (bidi_it->prev_for_neutral.type == STRONG_R
1904                    || bidi_it->prev_for_neutral.type == WEAK_EN
1905                    || bidi_it->prev_for_neutral.type == WEAK_AN)
1906                && !bidi_explicit_dir_char (bidi_it->ch))
1907         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1908                                        STRONG_R, current_level);
1909       else
1910         {
1911           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1912              the assumption of batch-style processing; see clauses W4,
1913              W5, and especially N1, which require to look far forward
1914              (as well as back) in the buffer/string.  May the fleas of
1915              a thousand camels infest the armpits of those who design
1916              supposedly general-purpose algorithms by looking at their
1917              own implementations, and fail to consider other possible
1918              implementations!  */
1919           struct bidi_it saved_it;
1920           bidi_type_t next_type;
1921
1922           if (bidi_it->scan_dir == -1)
1923             abort ();
1924
1925           bidi_copy_it (&saved_it, bidi_it);
1926           /* Scan the text forward until we find the first non-neutral
1927              character, and then use that to resolve the neutral we
1928              are dealing with now.  We also cache the scanned iterator
1929              states, to salvage some of the effort later.  */
1930           bidi_cache_iterator_state (bidi_it, 0);
1931           do {
1932             /* Record the info about the previous character, so that
1933                it will be cached below with this state.  */
1934             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1935                 && bidi_it->type != WEAK_BN)
1936               bidi_remember_char (&bidi_it->prev, bidi_it);
1937             type = bidi_resolve_weak (bidi_it);
1938             /* Paragraph separators have their levels fully resolved
1939                at this point, so cache them as resolved.  */
1940             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1941             /* FIXME: implement L1 here, by testing for a newline and
1942                resetting the level for any sequence of whitespace
1943                characters adjacent to it.  */
1944           } while (!(type == NEUTRAL_B
1945                      || (type != WEAK_BN
1946                          && bidi_get_category (type) != NEUTRAL)
1947                      /* This is all per level run, so stop when we
1948                         reach the end of this level run.  */
1949                      || (bidi_it->level_stack[bidi_it->stack_idx].level
1950                          != current_level)));
1951
1952           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1953
1954           switch (type)
1955             {
1956               case STRONG_L:
1957               case STRONG_R:
1958               case STRONG_AL:
1959                 /* Actually, STRONG_AL cannot happen here, because
1960                    bidi_resolve_weak converts it to STRONG_R, per W3.  */
1961                 xassert (type != STRONG_AL);
1962                 next_type = type;
1963                 break;
1964               case WEAK_EN:
1965               case WEAK_AN:
1966                 /* N1: ``European and Arabic numbers are treated as
1967                    though they were R.''  */
1968                 next_type = STRONG_R;
1969                 break;
1970               case WEAK_BN:
1971                 if (!bidi_explicit_dir_char (bidi_it->ch))
1972                   abort ();             /* can't happen: BNs are skipped */
1973                 /* FALLTHROUGH */
1974               case NEUTRAL_B:
1975                 /* Marched all the way to the end of this level run.
1976                    We need to use the eor type, whose information is
1977                    stored by bidi_set_sor_type in the prev_for_neutral
1978                    member.  */
1979                 if (saved_it.type != WEAK_BN
1980                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1981                   next_type = bidi_it->prev_for_neutral.type;
1982                 else
1983                   {
1984                     /* This is a BN which does not adjoin neutrals.
1985                        Leave its type alone.  */
1986                     bidi_copy_it (bidi_it, &saved_it);
1987                     return bidi_it->type;
1988                   }
1989                 break;
1990               default:
1991                 abort ();
1992             }
1993           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1994                                          next_type, current_level);
1995           saved_it.next_for_neutral.type = next_type;
1996           saved_it.type = type;
1997           bidi_check_type (next_type);
1998           bidi_check_type (type);
1999           bidi_copy_it (bidi_it, &saved_it);
2000         }
2001     }
2002   return type;
2003 }
2004
2005 /* Given an iterator state in BIDI_IT, advance one character position
2006    in the buffer/string to the next character (in the logical order),
2007    resolve the bidi type of that next character, and return that
2008    type.  */
2009 static bidi_type_t
2010 bidi_type_of_next_char (struct bidi_it *bidi_it)
2011 {
2012   bidi_type_t type;
2013
2014   /* This should always be called during a forward scan.  */
2015   if (bidi_it->scan_dir != 1)
2016     abort ();
2017
2018   /* Reset the limit until which to ignore BNs if we step out of the
2019      area where we found only empty levels.  */
2020   if ((bidi_it->ignore_bn_limit > -1
2021        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
2022       || (bidi_it->ignore_bn_limit == -2
2023           && !bidi_explicit_dir_char (bidi_it->ch)))
2024     bidi_it->ignore_bn_limit = -1;
2025
2026   type = bidi_resolve_neutral (bidi_it);
2027
2028   return type;
2029 }
2030
2031 /* Given an iterator state BIDI_IT, advance one character position in
2032    the buffer/string to the next character (in the current scan
2033    direction), resolve the embedding and implicit levels of that next
2034    character, and return the resulting level.  */
2035 static int
2036 bidi_level_of_next_char (struct bidi_it *bidi_it)
2037 {
2038   bidi_type_t type;
2039   int level, prev_level = -1;
2040   struct bidi_saved_info next_for_neutral;
2041   ptrdiff_t next_char_pos = -2;
2042
2043   if (bidi_it->scan_dir == 1)
2044     {
2045       ptrdiff_t eob
2046         = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2047            ? bidi_it->string.schars : ZV);
2048
2049       /* There's no sense in trying to advance if we hit end of text.  */
2050       if (bidi_it->charpos >= eob)
2051         return bidi_it->resolved_level;
2052
2053       /* Record the info about the previous character.  */
2054       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
2055           && bidi_it->type != WEAK_BN)
2056         bidi_remember_char (&bidi_it->prev, bidi_it);
2057       if (bidi_it->type_after_w1 == STRONG_R
2058           || bidi_it->type_after_w1 == STRONG_L
2059           || bidi_it->type_after_w1 == STRONG_AL)
2060         bidi_remember_char (&bidi_it->last_strong, bidi_it);
2061       /* FIXME: it sounds like we don't need both prev and
2062          prev_for_neutral members, but I'm leaving them both for now.  */
2063       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
2064           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
2065         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
2066
2067       /* If we overstepped the characters used for resolving neutrals
2068          and whitespace, invalidate their info in the iterator.  */
2069       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
2070         bidi_it->next_for_neutral.type = UNKNOWN_BT;
2071       if (bidi_it->next_en_pos >= 0
2072           && bidi_it->charpos >= bidi_it->next_en_pos)
2073         {
2074           bidi_it->next_en_pos = 0;
2075           bidi_it->next_en_type = UNKNOWN_BT;
2076         }
2077       if (bidi_it->next_for_ws.type != UNKNOWN_BT
2078           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
2079         bidi_it->next_for_ws.type = UNKNOWN_BT;
2080
2081       /* This must be taken before we fill the iterator with the info
2082          about the next char.  If we scan backwards, the iterator
2083          state must be already cached, so there's no need to know the
2084          embedding level of the previous character, since we will be
2085          returning to our caller shortly.  */
2086       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
2087     }
2088   next_for_neutral = bidi_it->next_for_neutral;
2089
2090   /* Perhaps the character we want is already cached.  If it is, the
2091      call to bidi_cache_find below will return a type other than
2092      UNKNOWN_BT.  */
2093   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
2094     {
2095       int bob = ((bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2096                  ? 0 : 1);
2097       if (bidi_it->scan_dir > 0)
2098         {
2099           if (bidi_it->nchars <= 0)
2100             abort ();
2101           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2102         }
2103       else if (bidi_it->charpos >= bob)
2104         /* Implementation note: we allow next_char_pos to be as low as
2105            0 for buffers or -1 for strings, and that is okay because
2106            that's the "position" of the sentinel iterator state we
2107            cached at the beginning of the iteration.  */
2108         next_char_pos = bidi_it->charpos - 1;
2109       if (next_char_pos >= bob - 1)
2110         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2111       else
2112         type = UNKNOWN_BT;
2113     }
2114   else
2115     type = UNKNOWN_BT;
2116   if (type != UNKNOWN_BT)
2117     {
2118       /* Don't lose the information for resolving neutrals!  The
2119          cached states could have been cached before their
2120          next_for_neutral member was computed.  If we are on our way
2121          forward, we can simply take the info from the previous
2122          state.  */
2123       if (bidi_it->scan_dir == 1
2124           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2125         bidi_it->next_for_neutral = next_for_neutral;
2126
2127       /* If resolved_level is -1, it means this state was cached
2128          before it was completely resolved, so we cannot return
2129          it.  */
2130       if (bidi_it->resolved_level != -1)
2131         return bidi_it->resolved_level;
2132     }
2133   if (bidi_it->scan_dir == -1)
2134     /* If we are going backwards, the iterator state is already cached
2135        from previous scans, and should be fully resolved.  */
2136     abort ();
2137
2138   if (type == UNKNOWN_BT)
2139     type = bidi_type_of_next_char (bidi_it);
2140
2141   if (type == NEUTRAL_B)
2142     return bidi_it->resolved_level;
2143
2144   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2145   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2146       || (type == WEAK_BN && prev_level == level))
2147     {
2148       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2149         abort ();
2150
2151       /* If the cached state shows a neutral character, it was not
2152          resolved by bidi_resolve_neutral, so do it now.  */
2153       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2154                                      bidi_it->next_for_neutral.type,
2155                                      level);
2156     }
2157
2158   if (!(type == STRONG_R
2159         || type == STRONG_L
2160         || type == WEAK_BN
2161         || type == WEAK_EN
2162         || type == WEAK_AN))
2163     abort ();
2164   bidi_it->type = type;
2165   bidi_check_type (bidi_it->type);
2166
2167   /* For L1 below, we need to know, for each WS character, whether
2168      it belongs to a sequence of WS characters preceding a newline
2169      or a TAB or a paragraph separator.  */
2170   if (bidi_it->orig_type == NEUTRAL_WS
2171       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2172     {
2173       int ch;
2174       ptrdiff_t clen = bidi_it->ch_len;
2175       ptrdiff_t bpos = bidi_it->bytepos;
2176       ptrdiff_t cpos = bidi_it->charpos;
2177       ptrdiff_t disp_pos = bidi_it->disp_pos;
2178       ptrdiff_t nc = bidi_it->nchars;
2179       struct bidi_string_data bs = bidi_it->string;
2180       bidi_type_t chtype;
2181       int fwp = bidi_it->frame_window_p;
2182       int dpp = bidi_it->disp_prop;
2183
2184       if (bidi_it->nchars <= 0)
2185         abort ();
2186       do {
2187         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2188                               fwp, &clen, &nc);
2189         if (ch == '\n' || ch == BIDI_EOB)
2190           chtype = NEUTRAL_B;
2191         else
2192           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2193       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2194                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2195       bidi_it->next_for_ws.type = chtype;
2196       bidi_check_type (bidi_it->next_for_ws.type);
2197       bidi_it->next_for_ws.charpos = cpos;
2198       bidi_it->next_for_ws.bytepos = bpos;
2199     }
2200
2201   /* Resolve implicit levels, with a twist: PDFs get the embedding
2202      level of the embedding they terminate.  See below for the
2203      reason.  */
2204   if (bidi_it->orig_type == PDF
2205       /* Don't do this if this formatting code didn't change the
2206          embedding level due to invalid or empty embeddings.  */
2207       && prev_level != level)
2208     {
2209       /* Don't look in UAX#9 for the reason for this: it's our own
2210          private quirk.  The reason is that we want the formatting
2211          codes to be delivered so that they bracket the text of their
2212          embedding.  For example, given the text
2213
2214              {RLO}teST{PDF}
2215
2216          we want it to be displayed as
2217
2218              {PDF}STet{RLO}
2219
2220          not as
2221
2222              STet{RLO}{PDF}
2223
2224          which will result because we bump up the embedding level as
2225          soon as we see the RLO and pop it as soon as we see the PDF,
2226          so RLO itself has the same embedding level as "teST", and
2227          thus would be normally delivered last, just before the PDF.
2228          The switch below fiddles with the level of PDF so that this
2229          ugly side effect does not happen.
2230
2231          (This is, of course, only important if the formatting codes
2232          are actually displayed, but Emacs does need to display them
2233          if the user wants to.)  */
2234       level = prev_level;
2235     }
2236   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2237            || bidi_it->orig_type == NEUTRAL_S
2238            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2239            || (bidi_it->orig_type == NEUTRAL_WS
2240                && (bidi_it->next_for_ws.type == NEUTRAL_B
2241                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2242     level = bidi_it->level_stack[0].level;
2243   else if ((level & 1) == 0) /* I1 */
2244     {
2245       if (type == STRONG_R)
2246         level++;
2247       else if (type == WEAK_EN || type == WEAK_AN)
2248         level += 2;
2249     }
2250   else                  /* I2 */
2251     {
2252       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2253         level++;
2254     }
2255
2256   bidi_it->resolved_level = level;
2257   return level;
2258 }
2259
2260 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2261    non-zero, we are at the end of a level, and we need to prepare to
2262    resume the scan of the lower level.
2263
2264    If this level's other edge is cached, we simply jump to it, filling
2265    the iterator structure with the iterator state on the other edge.
2266    Otherwise, we walk the buffer or string until we come back to the
2267    same level as LEVEL.
2268
2269    Note: we are not talking here about a ``level run'' in the UAX#9
2270    sense of the term, but rather about a ``level'' which includes
2271    all the levels higher than it.  In other words, given the levels
2272    like this:
2273
2274          11111112222222333333334443343222222111111112223322111
2275                 A      B                    C
2276
2277    and assuming we are at point A scanning left to right, this
2278    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2279    at point B.  */
2280 static void
2281 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2282 {
2283   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2284   ptrdiff_t idx;
2285
2286   /* Try the cache first.  */
2287   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2288       >= bidi_cache_start)
2289     bidi_cache_fetch_state (idx, bidi_it);
2290   else
2291     {
2292       int new_level;
2293
2294       if (end_flag)
2295         abort (); /* if we are at end of level, its edges must be cached */
2296
2297       bidi_cache_iterator_state (bidi_it, 1);
2298       do {
2299         new_level = bidi_level_of_next_char (bidi_it);
2300         bidi_cache_iterator_state (bidi_it, 1);
2301       } while (new_level >= level);
2302     }
2303 }
2304
2305 void
2306 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2307 {
2308   int old_level, new_level, next_level;
2309   struct bidi_it sentinel;
2310   struct gcpro gcpro1;
2311
2312   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2313     abort ();
2314
2315   if (bidi_it->scan_dir == 0)
2316     {
2317       bidi_it->scan_dir = 1;    /* default to logical order */
2318     }
2319
2320   /* The code below can call eval, and thus cause GC.  If we are
2321      iterating a Lisp string, make sure it won't be GCed.  */
2322   if (STRINGP (bidi_it->string.lstring))
2323     GCPRO1 (bidi_it->string.lstring);
2324
2325   /* If we just passed a newline, initialize for the next line.  */
2326   if (!bidi_it->first_elt
2327       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2328     bidi_line_init (bidi_it);
2329
2330   /* Prepare the sentinel iterator state, and cache it.  When we bump
2331      into it, scanning backwards, we'll know that the last non-base
2332      level is exhausted.  */
2333   if (bidi_cache_idx == bidi_cache_start)
2334     {
2335       bidi_copy_it (&sentinel, bidi_it);
2336       if (bidi_it->first_elt)
2337         {
2338           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2339           sentinel.bytepos--;
2340           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2341           sentinel.ch_len = 1;
2342           sentinel.nchars = 1;
2343         }
2344       bidi_cache_iterator_state (&sentinel, 1);
2345     }
2346
2347   old_level = bidi_it->resolved_level;
2348   new_level = bidi_level_of_next_char (bidi_it);
2349
2350   /* Reordering of resolved levels (clause L2) is implemented by
2351      jumping to the other edge of the level and flipping direction of
2352      scanning the text whenever we find a level change.  */
2353   if (new_level != old_level)
2354     {
2355       int ascending = new_level > old_level;
2356       int level_to_search = ascending ? old_level + 1 : old_level;
2357       int incr = ascending ? 1 : -1;
2358       int expected_next_level = old_level + incr;
2359
2360       /* Jump (or walk) to the other edge of this level.  */
2361       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2362       /* Switch scan direction and peek at the next character in the
2363          new direction.  */
2364       bidi_it->scan_dir = -bidi_it->scan_dir;
2365
2366       /* The following loop handles the case where the resolved level
2367          jumps by more than one.  This is typical for numbers inside a
2368          run of text with left-to-right embedding direction, but can
2369          also happen in other situations.  In those cases the decision
2370          where to continue after a level change, and in what direction,
2371          is tricky.  For example, given a text like below:
2372
2373                   abcdefgh
2374                   11336622
2375
2376          (where the numbers below the text show the resolved levels),
2377          the result of reordering according to UAX#9 should be this:
2378
2379                   efdcghba
2380
2381          This is implemented by the loop below which flips direction
2382          and jumps to the other edge of the level each time it finds
2383          the new level not to be the expected one.  The expected level
2384          is always one more or one less than the previous one.  */
2385       next_level = bidi_peek_at_next_level (bidi_it);
2386       while (next_level != expected_next_level)
2387         {
2388           expected_next_level += incr;
2389           level_to_search += incr;
2390           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2391           bidi_it->scan_dir = -bidi_it->scan_dir;
2392           next_level = bidi_peek_at_next_level (bidi_it);
2393         }
2394
2395       /* Finally, deliver the next character in the new direction.  */
2396       next_level = bidi_level_of_next_char (bidi_it);
2397     }
2398
2399   /* Take note when we have just processed the newline that precedes
2400      the end of the paragraph.  The next time we are about to be
2401      called, set_iterator_to_next will automatically reinit the
2402      paragraph direction, if needed.  We do this at the newline before
2403      the paragraph separator, because the next character might not be
2404      the first character of the next paragraph, due to the bidi
2405      reordering, whereas we _must_ know the paragraph base direction
2406      _before_ we process the paragraph's text, since the base
2407      direction affects the reordering.  */
2408   if (bidi_it->scan_dir == 1
2409       && (bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB))
2410     {
2411       /* The paragraph direction of the entire string, once
2412          determined, is in effect for the entire string.  Setting the
2413          separator limit to the end of the string prevents
2414          bidi_paragraph_init from being called automatically on this
2415          string.  */
2416       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2417         bidi_it->separator_limit = bidi_it->string.schars;
2418       else if (bidi_it->bytepos < ZV_BYTE)
2419         {
2420           ptrdiff_t sep_len
2421             = bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2422                                      bidi_it->bytepos + bidi_it->ch_len);
2423           if (bidi_it->nchars <= 0)
2424             abort ();
2425           if (sep_len >= 0)
2426             {
2427               bidi_it->new_paragraph = 1;
2428               /* Record the buffer position of the last character of the
2429                  paragraph separator.  */
2430               bidi_it->separator_limit
2431                 = bidi_it->charpos + bidi_it->nchars + sep_len;
2432             }
2433         }
2434     }
2435
2436   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2437     {
2438       /* If we are at paragraph's base embedding level and beyond the
2439          last cached position, the cache's job is done and we can
2440          discard it.  */
2441       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2442           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2443                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2444         bidi_cache_reset ();
2445         /* But as long as we are caching during forward scan, we must
2446            cache each state, or else the cache integrity will be
2447            compromised: it assumes cached states correspond to buffer
2448            positions 1:1.  */
2449       else
2450         bidi_cache_iterator_state (bidi_it, 1);
2451     }
2452
2453   if (STRINGP (bidi_it->string.lstring))
2454     UNGCPRO;
2455 }
2456
2457 /* This is meant to be called from within the debugger, whenever you
2458    wish to examine the cache contents.  */
2459 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2460 void
2461 bidi_dump_cached_states (void)
2462 {
2463   ptrdiff_t i;
2464   int ndigits = 1;
2465
2466   if (bidi_cache_idx == 0)
2467     {
2468       fprintf (stderr, "The cache is empty.\n");
2469       return;
2470     }
2471   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2472            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2473
2474   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2475     ndigits++;
2476   fputs ("ch  ", stderr);
2477   for (i = 0; i < bidi_cache_idx; i++)
2478     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2479   fputs ("\n", stderr);
2480   fputs ("lvl ", stderr);
2481   for (i = 0; i < bidi_cache_idx; i++)
2482     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2483   fputs ("\n", stderr);
2484   fputs ("pos ", stderr);
2485   for (i = 0; i < bidi_cache_idx; i++)
2486     fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
2487   fputs ("\n", stderr);
2488 }