src/bidi.c

   1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    (UBA) as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    Two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    A few auxiliary entry points are used to initialize the bidi
  44    iterator for iterating an object (buffer or string), push and pop
  45    the bidi iterator state, and save and restore the state of the bidi
  46    cache.
  47
  48    If you want to understand the code, you will have to read it
  49    together with the relevant portions of UAX#9.  The comments include
  50    references to UAX#9 rules, for that very reason.
  51
  52    A note about references to UAX#9 rules: if the reference says
  53    something like "X9/Retaining", it means that you need to refer to
  54    rule X9 and to its modifications decribed in the "Implementation
  55    Notes" section of UAX#9, under "Retaining Format Codes".  */
  56
  57 #include <config.h>
  58 #include <stdio.h>
  59 #include <setjmp.h>
  60
  61 #include "lisp.h"
  62 #include "buffer.h"
  63 #include "character.h"
  64 #include "dispextern.h"
  65
  66 static int bidi_initialized = 0;
  67
  68 static Lisp_Object bidi_type_table, bidi_mirror_table;
  69
  70 #define LRM_CHAR   0x200E
  71 #define RLM_CHAR   0x200F
  72 #define BIDI_EOB   -1
  73
  74 /* Data type for describing the bidirectional character categories.  */
  75 typedef enum {
  76   UNKNOWN_BC,
  77   NEUTRAL,
  78   WEAK,
  79   STRONG
  80 } bidi_category_t;
  81
  82 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  83 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  84
  85 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  86 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  87
  88 \f
  89 /***********************************************************************
  90                         Utilities
  91  ***********************************************************************/
  92
  93 /* Return the bidi type of a character CH, subject to the current
  94    directional OVERRIDE.  */
  95 static inline bidi_type_t
  96 bidi_get_type (int ch, bidi_dir_t override)
  97 {
  98   bidi_type_t default_type;
  99
 100   if (ch == BIDI_EOB)
 101     return NEUTRAL_B;
 102   if (ch < 0 || ch > MAX_CHAR)
 103     abort ();
 104
 105   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 106
 107   if (override == NEUTRAL_DIR)
 108     return default_type;
 109
 110   switch (default_type)
 111     {
 112       /* Although UAX#9 does not tell, it doesn't make sense to
 113          override NEUTRAL_B and LRM/RLM characters.  */
 114       case NEUTRAL_B:
 115       case LRE:
 116       case LRO:
 117       case RLE:
 118       case RLO:
 119       case PDF:
 120         return default_type;
 121       default:
 122         switch (ch)
 123           {
 124             case LRM_CHAR:
 125             case RLM_CHAR:
 126               return default_type;
 127             default:
 128               if (override == L2R) /* X6 */
 129                 return STRONG_L;
 130               else if (override == R2L)
 131                 return STRONG_R;
 132               else
 133                 abort ();       /* can't happen: handled above */
 134           }
 135     }
 136 }
 137
 138 static void
 139 bidi_check_type (bidi_type_t type)
 140 {
 141   if (type < UNKNOWN_BT || type > NEUTRAL_ON)
 142     abort ();
 143 }
 144
 145 /* Given a bidi TYPE of a character, return its category.  */
 146 static inline bidi_category_t
 147 bidi_get_category (bidi_type_t type)
 148 {
 149   switch (type)
 150     {
 151       case UNKNOWN_BT:
 152         return UNKNOWN_BC;
 153       case STRONG_L:
 154       case STRONG_R:
 155       case STRONG_AL:
 156       case LRE:
 157       case LRO:
 158       case RLE:
 159       case RLO:
 160         return STRONG;
 161       case PDF:         /* ??? really?? */
 162       case WEAK_EN:
 163       case WEAK_ES:
 164       case WEAK_ET:
 165       case WEAK_AN:
 166       case WEAK_CS:
 167       case WEAK_NSM:
 168       case WEAK_BN:
 169         return WEAK;
 170       case NEUTRAL_B:
 171       case NEUTRAL_S:
 172       case NEUTRAL_WS:
 173       case NEUTRAL_ON:
 174         return NEUTRAL;
 175       default:
 176         abort ();
 177     }
 178 }
 179
 180 /* Return the mirrored character of C, if it has one.  If C has no
 181    mirrored counterpart, return C.
 182    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 183    context must be tested by the caller.  */
 184 int
 185 bidi_mirror_char (int c)
 186 {
 187   Lisp_Object val;
 188
 189   if (c == BIDI_EOB)
 190     return c;
 191   if (c < 0 || c > MAX_CHAR)
 192     abort ();
 193
 194   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 195   if (INTEGERP (val))
 196     {
 197       int v = XINT (val);
 198
 199       if (v < 0 || v > MAX_CHAR)
 200         abort ();
 201
 202       return v;
 203     }
 204
 205   return c;
 206 }
 207
 208 /* Determine the start-of-run (sor) directional type given the two
 209    embedding levels on either side of the run boundary.  Also, update
 210    the saved info about previously seen characters, since that info is
 211    generally valid for a single level run.  */
 212 static inline void
 213 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 214 {
 215   int higher_level = level_before > level_after ? level_before : level_after;
 216
 217   /* The prev_was_pdf gork is required for when we have several PDFs
 218      in a row.  In that case, we want to compute the sor type for the
 219      next level run only once: when we see the first PDF.  That's
 220      because the sor type depends only on the higher of the two levels
 221      that we find on the two sides of the level boundary (see UAX#9,
 222      clause X10), and so we don't need to know the final embedding
 223      level to which we descend after processing all the PDFs.  */
 224   if (!bidi_it->prev_was_pdf || level_before < level_after)
 225     /* FIXME: should the default sor direction be user selectable?  */
 226     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 227   if (level_before > level_after)
 228     bidi_it->prev_was_pdf = 1;
 229
 230   bidi_it->prev.type = UNKNOWN_BT;
 231   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 232     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 233   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 234   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 235   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 236   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 237     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 238   bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
 239 }
 240
 241 /* Push the current embedding level and override status; reset the
 242    current level to LEVEL and the current override status to OVERRIDE.  */
 243 static inline void
 244 bidi_push_embedding_level (struct bidi_it *bidi_it,
 245                            int level, bidi_dir_t override)
 246 {
 247   bidi_it->stack_idx++;
 248   xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
 249   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 250   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 251 }
 252
 253 /* Pop the embedding level and directional override status from the
 254    stack, and return the new level.  */
 255 static inline int
 256 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 257 {
 258   /* UAX#9 says to ignore invalid PDFs.  */
 259   if (bidi_it->stack_idx > 0)
 260     bidi_it->stack_idx--;
 261   return bidi_it->level_stack[bidi_it->stack_idx].level;
 262 }
 263
 264 /* Record in SAVED_INFO the information about the current character.  */
 265 static inline void
 266 bidi_remember_char (struct bidi_saved_info *saved_info,
 267                     struct bidi_it *bidi_it)
 268 {
 269   saved_info->charpos = bidi_it->charpos;
 270   saved_info->bytepos = bidi_it->bytepos;
 271   saved_info->type = bidi_it->type;
 272   bidi_check_type (bidi_it->type);
 273   saved_info->type_after_w1 = bidi_it->type_after_w1;
 274   bidi_check_type (bidi_it->type_after_w1);
 275   saved_info->orig_type = bidi_it->orig_type;
 276   bidi_check_type (bidi_it->orig_type);
 277 }
 278
 279 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 280    copies the part of the level stack that is actually in use.  */
 281 static inline void
 282 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 283 {
 284   int i;
 285
 286   /* Copy everything except the level stack and beyond.  */
 287   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 288
 289   /* Copy the active part of the level stack.  */
 290   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 291   for (i = 1; i <= from->stack_idx; i++)
 292     to->level_stack[i] = from->level_stack[i];
 293 }
 294
 295 \f
 296 /***********************************************************************
 297                         Caching the bidi iterator states
 298  ***********************************************************************/
 299
 300 #define BIDI_CACHE_CHUNK 200
 301 static struct bidi_it *bidi_cache;
 302 static ptrdiff_t bidi_cache_size = 0;
 303 enum { elsz = sizeof (struct bidi_it) };
 304 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 305 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 306 static ptrdiff_t bidi_cache_start = 0;  /* start of cache for this
 307                                            "stack" level */
 308
 309 /* Reset the cache state to the empty state.  We only reset the part
 310    of the cache relevant to iteration of the current object.  Previous
 311    objects, which are pushed on the display iterator's stack, are left
 312    intact.  This is called when the cached information is no more
 313    useful for the current iteration, e.g. when we were reseated to a
 314    new position on the same object.  */
 315 static inline void
 316 bidi_cache_reset (void)
 317 {
 318   bidi_cache_idx = bidi_cache_start;
 319   bidi_cache_last_idx = -1;
 320 }
 321
 322 /* Shrink the cache to its minimal size.  Called when we init the bidi
 323    iterator for reordering a buffer or a string that does not come
 324    from display properties, because that means all the previously
 325    cached info is of no further use.  */
 326 static inline void
 327 bidi_cache_shrink (void)
 328 {
 329   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 330     {
 331       bidi_cache_size = BIDI_CACHE_CHUNK;
 332       bidi_cache =
 333         (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 334     }
 335   bidi_cache_reset ();
 336 }
 337
 338 static inline void
 339 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 340 {
 341   int current_scan_dir = bidi_it->scan_dir;
 342
 343   if (idx < bidi_cache_start || idx >= bidi_cache_idx)
 344     abort ();
 345
 346   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 347   bidi_it->scan_dir = current_scan_dir;
 348   bidi_cache_last_idx = idx;
 349 }
 350
 351 /* Find a cached state with a given CHARPOS and resolved embedding
 352    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 353    resolved levels in cached states.  DIR, if non-zero, means search
 354    in that direction from the last cache hit.  */
 355 static inline ptrdiff_t
 356 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 357 {
 358   ptrdiff_t i, i_start;
 359
 360   if (bidi_cache_idx > bidi_cache_start)
 361     {
 362       if (bidi_cache_last_idx == -1)
 363         bidi_cache_last_idx = bidi_cache_idx - 1;
 364       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 365         {
 366           dir = -1;
 367           i_start = bidi_cache_last_idx - 1;
 368         }
 369       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 370                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 371         {
 372           dir = 1;
 373           i_start = bidi_cache_last_idx + 1;
 374         }
 375       else if (dir)
 376         i_start = bidi_cache_last_idx;
 377       else
 378         {
 379           dir = -1;
 380           i_start = bidi_cache_idx - 1;
 381         }
 382
 383       if (dir < 0)
 384         {
 385           /* Linear search for now; FIXME!  */
 386           for (i = i_start; i >= bidi_cache_start; i--)
 387             if (bidi_cache[i].charpos <= charpos
 388                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 389                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 390               return i;
 391         }
 392       else
 393         {
 394           for (i = i_start; i < bidi_cache_idx; i++)
 395             if (bidi_cache[i].charpos <= charpos
 396                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 397                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 398               return i;
 399         }
 400     }
 401
 402   return -1;
 403 }
 404
 405 /* Find a cached state where the resolved level changes to a value
 406    that is lower than LEVEL, and return its cache slot index.  DIR is
 407    the direction to search, starting with the last used cache slot.
 408    If DIR is zero, we search backwards from the last occupied cache
 409    slot.  BEFORE, if non-zero, means return the index of the slot that
 410    is ``before'' the level change in the search direction.  That is,
 411    given the cached levels like this:
 412
 413          1122333442211
 414           AB        C
 415
 416    and assuming we are at the position cached at the slot marked with
 417    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 418    index of slot B or A, depending whether BEFORE is, respectively,
 419    non-zero or zero.  */
 420 static ptrdiff_t
 421 bidi_cache_find_level_change (int level, int dir, int before)
 422 {
 423   if (bidi_cache_idx)
 424     {
 425       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 426       int incr = before ? 1 : 0;
 427
 428       xassert (!dir || bidi_cache_last_idx >= 0);
 429
 430       if (!dir)
 431         dir = -1;
 432       else if (!incr)
 433         i += dir;
 434
 435       if (dir < 0)
 436         {
 437           while (i >= bidi_cache_start + incr)
 438             {
 439               if (bidi_cache[i - incr].resolved_level >= 0
 440                   && bidi_cache[i - incr].resolved_level < level)
 441                 return i;
 442               i--;
 443             }
 444         }
 445       else
 446         {
 447           while (i < bidi_cache_idx - incr)
 448             {
 449               if (bidi_cache[i + incr].resolved_level >= 0
 450                   && bidi_cache[i + incr].resolved_level < level)
 451                 return i;
 452               i++;
 453             }
 454         }
 455     }
 456
 457   return -1;
 458 }
 459
 460 static inline void
 461 bidi_cache_ensure_space (ptrdiff_t idx)
 462 {
 463   /* Enlarge the cache as needed.  */
 464   if (idx >= bidi_cache_size)
 465     {
 466       ptrdiff_t new_size;
 467
 468       /* The bidi cache cannot be larger than the largest Lisp string
 469          or buffer.  */
 470       ptrdiff_t string_or_buffer_bound =
 471         max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
 472
 473       /* Also, it cannot be larger than what C can represent.  */
 474       ptrdiff_t c_bound = min (PTRDIFF_MAX, SIZE_MAX) / elsz;
 475
 476       if (min (string_or_buffer_bound, c_bound) <= idx)
 477         memory_full (SIZE_MAX);
 478       new_size = idx - idx % BIDI_CACHE_CHUNK + BIDI_CACHE_CHUNK;
 479       bidi_cache = (struct bidi_it *) xrealloc (bidi_cache, new_size * elsz);
 480       bidi_cache_size = new_size;
 481     }
 482 }
 483
 484 static inline void
 485 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 486 {
 487   ptrdiff_t idx;
 488
 489   /* We should never cache on backward scans.  */
 490   if (bidi_it->scan_dir == -1)
 491     abort ();
 492   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 493
 494   if (idx < 0)
 495     {
 496       idx = bidi_cache_idx;
 497       bidi_cache_ensure_space (idx);
 498       /* Character positions should correspond to cache positions 1:1.
 499          If we are outside the range of cached positions, the cache is
 500          useless and must be reset.  */
 501       if (idx > bidi_cache_start &&
 502           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 503                                + bidi_cache[idx - 1].nchars)
 504            || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
 505         {
 506           bidi_cache_reset ();
 507           idx = bidi_cache_start;
 508         }
 509       if (bidi_it->nchars <= 0)
 510         abort ();
 511       bidi_copy_it (&bidi_cache[idx], bidi_it);
 512       if (!resolved)
 513         bidi_cache[idx].resolved_level = -1;
 514     }
 515   else
 516     {
 517       /* Copy only the members which could have changed, to avoid
 518          costly copying of the entire struct.  */
 519       bidi_cache[idx].type = bidi_it->type;
 520       bidi_check_type (bidi_it->type);
 521       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 522       bidi_check_type (bidi_it->type_after_w1);
 523       if (resolved)
 524         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 525       else
 526         bidi_cache[idx].resolved_level = -1;
 527       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 528       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 529       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 530       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 531       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 532     }
 533
 534   bidi_cache_last_idx = idx;
 535   if (idx >= bidi_cache_idx)
 536     bidi_cache_idx = idx + 1;
 537 }
 538
 539 static inline bidi_type_t
 540 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 541 {
 542   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 543
 544   if (i >= bidi_cache_start)
 545     {
 546       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 547
 548       bidi_copy_it (bidi_it, &bidi_cache[i]);
 549       bidi_cache_last_idx = i;
 550       /* Don't let scan direction from from the cached state override
 551          the current scan direction.  */
 552       bidi_it->scan_dir = current_scan_dir;
 553       return bidi_it->type;
 554     }
 555
 556   return UNKNOWN_BT;
 557 }
 558
 559 static inline int
 560 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 561 {
 562   if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
 563     abort ();
 564   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 565 }
 566
 567 \f
 568 /***********************************************************************
 569              Pushing and popping the bidi iterator state
 570  ***********************************************************************/
 571 /* 5-slot stack for saving the start of the previous level of the
 572    cache.  xdisp.c maintains a 5-slot stack for its iterator state,
 573    and we need the same size of our stack.  */
 574 static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
 575 static int bidi_cache_sp;
 576
 577 /* Push the bidi iterator state in preparation for reordering a
 578    different object, e.g. display string found at certain buffer
 579    position.  Pushing the bidi iterator boils down to saving its
 580    entire state on the cache and starting a new cache "stacked" on top
 581    of the current cache.  */
 582 void
 583 bidi_push_it (struct bidi_it *bidi_it)
 584 {
 585   /* Save the current iterator state in its entirety after the last
 586      used cache slot.  */
 587   bidi_cache_ensure_space (bidi_cache_idx);
 588   memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
 589
 590   /* Push the current cache start onto the stack.  */
 591   xassert (bidi_cache_sp < IT_STACK_SIZE);
 592   bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
 593
 594   /* Start a new level of cache, and make it empty.  */
 595   bidi_cache_start = bidi_cache_idx;
 596   bidi_cache_last_idx = -1;
 597 }
 598
 599 /* Restore the iterator state saved by bidi_push_it and return the
 600    cache to the corresponding state.  */
 601 void
 602 bidi_pop_it (struct bidi_it *bidi_it)
 603 {
 604   if (bidi_cache_start <= 0)
 605     abort ();
 606
 607   /* Reset the next free cache slot index to what it was before the
 608      call to bidi_push_it.  */
 609   bidi_cache_idx = bidi_cache_start - 1;
 610
 611   /* Restore the bidi iterator state saved in the cache.  */
 612   memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
 613
 614   /* Pop the previous cache start from the stack.  */
 615   if (bidi_cache_sp <= 0)
 616     abort ();
 617   bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
 618
 619   /* Invalidate the last-used cache slot data.  */
 620   bidi_cache_last_idx = -1;
 621 }
 622
 623 static ptrdiff_t bidi_cache_total_alloc;
 624
 625 /* Stash away a copy of the cache and its control variables.  */
 626 void *
 627 bidi_shelve_cache (void)
 628 {
 629   unsigned char *databuf;
 630
 631   /* Empty cache.  */
 632   if (bidi_cache_idx == 0)
 633     return NULL;
 634
 635   databuf = xmalloc (sizeof (bidi_cache_idx)
 636                      + bidi_cache_idx * sizeof (struct bidi_it)
 637                      + sizeof (bidi_cache_start_stack)
 638                      + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 639                      + sizeof (bidi_cache_last_idx));
 640   bidi_cache_total_alloc +=
 641     sizeof (bidi_cache_idx) + bidi_cache_idx * sizeof (struct bidi_it)
 642     + sizeof (bidi_cache_start_stack)
 643     + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
 644     + sizeof (bidi_cache_last_idx);
 645
 646   memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
 647   memcpy (databuf + sizeof (bidi_cache_idx),
 648           bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
 649   memcpy (databuf + sizeof (bidi_cache_idx)
 650           + bidi_cache_idx * sizeof (struct bidi_it),
 651           bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
 652   memcpy (databuf + sizeof (bidi_cache_idx)
 653           + bidi_cache_idx * sizeof (struct bidi_it)
 654           + sizeof (bidi_cache_start_stack),
 655           &bidi_cache_sp, sizeof (bidi_cache_sp));
 656   memcpy (databuf + sizeof (bidi_cache_idx)
 657           + bidi_cache_idx * sizeof (struct bidi_it)
 658           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 659           &bidi_cache_start, sizeof (bidi_cache_start));
 660   memcpy (databuf + sizeof (bidi_cache_idx)
 661           + bidi_cache_idx * sizeof (struct bidi_it)
 662           + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 663           + sizeof (bidi_cache_start),
 664           &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
 665
 666   return databuf;
 667 }
 668
 669 /* Restore the cache state from a copy stashed away by
 670    bidi_shelve_cache, and free the buffer used to stash that copy.
 671    JUST_FREE non-zero means free the buffer, but don't restore the
 672    cache; used when the corresponding iterator is discarded instead of
 673    being restored.  */
 674 void
 675 bidi_unshelve_cache (void *databuf, int just_free)
 676 {
 677   unsigned char *p = databuf;
 678
 679   if (!p)
 680     {
 681       if (!just_free)
 682         {
 683           /* A NULL pointer means an empty cache.  */
 684           bidi_cache_start = 0;
 685           bidi_cache_sp = 0;
 686           bidi_cache_reset ();
 687         }
 688     }
 689   else
 690     {
 691       if (just_free)
 692         {
 693           ptrdiff_t idx;
 694
 695           memcpy (&idx, p, sizeof (bidi_cache_idx));
 696           bidi_cache_total_alloc -=
 697             sizeof (bidi_cache_idx) + idx * sizeof (struct bidi_it)
 698             + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 699             + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx);
 700         }
 701       else
 702         {
 703           memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
 704           bidi_cache_ensure_space (bidi_cache_idx);
 705           memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
 706                   bidi_cache_idx * sizeof (struct bidi_it));
 707           memcpy (bidi_cache_start_stack,
 708                   p + sizeof (bidi_cache_idx)
 709                   + bidi_cache_idx * sizeof (struct bidi_it),
 710                   sizeof (bidi_cache_start_stack));
 711           memcpy (&bidi_cache_sp,
 712                   p + sizeof (bidi_cache_idx)
 713                   + bidi_cache_idx * sizeof (struct bidi_it)
 714                   + sizeof (bidi_cache_start_stack),
 715                   sizeof (bidi_cache_sp));
 716           memcpy (&bidi_cache_start,
 717                   p + sizeof (bidi_cache_idx)
 718                   + bidi_cache_idx * sizeof (struct bidi_it)
 719                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
 720                   sizeof (bidi_cache_start));
 721           memcpy (&bidi_cache_last_idx,
 722                   p + sizeof (bidi_cache_idx)
 723                   + bidi_cache_idx * sizeof (struct bidi_it)
 724                   + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 725                   + sizeof (bidi_cache_start),
 726                   sizeof (bidi_cache_last_idx));
 727           bidi_cache_total_alloc -=
 728             sizeof (bidi_cache_idx) + bidi_cache_idx * sizeof (struct bidi_it)
 729             + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
 730             + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx);
 731         }
 732
 733       xfree (p);
 734     }
 735 }
 736
 737 \f
 738 /***********************************************************************
 739                         Initialization
 740  ***********************************************************************/
 741 static void
 742 bidi_initialize (void)
 743 {
 744
 745 #include "biditype.h"
 746 #include "bidimirror.h"
 747
 748   int i;
 749
 750   bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
 751   staticpro (&bidi_type_table);
 752
 753   for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
 754     char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
 755                           make_number (bidi_type[i].type));
 756
 757   bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
 758   staticpro (&bidi_mirror_table);
 759
 760   for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
 761     char_table_set (bidi_mirror_table, bidi_mirror[i].from,
 762                     make_number (bidi_mirror[i].to));
 763
 764   Qparagraph_start = intern ("paragraph-start");
 765   staticpro (&Qparagraph_start);
 766   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 767   if (!STRINGP (paragraph_start_re))
 768     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 769   staticpro (&paragraph_start_re);
 770   Qparagraph_separate = intern ("paragraph-separate");
 771   staticpro (&Qparagraph_separate);
 772   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 773   if (!STRINGP (paragraph_separate_re))
 774     paragraph_separate_re = build_string ("[ \t\f]*$");
 775   staticpro (&paragraph_separate_re);
 776
 777   bidi_cache_sp = 0;
 778   bidi_cache_total_alloc = 0;
 779
 780   bidi_initialized = 1;
 781 }
 782
 783 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 784    end.  */
 785 static inline void
 786 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 787 {
 788   bidi_it->invalid_levels = 0;
 789   bidi_it->invalid_rl_levels = -1;
 790   bidi_it->stack_idx = 0;
 791   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 792 }
 793
 794 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 795 void
 796 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 797               struct bidi_it *bidi_it)
 798 {
 799   if (! bidi_initialized)
 800     bidi_initialize ();
 801   if (charpos >= 0)
 802     bidi_it->charpos = charpos;
 803   if (bytepos >= 0)
 804     bidi_it->bytepos = bytepos;
 805   bidi_it->frame_window_p = frame_window_p;
 806   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 807   bidi_it->first_elt = 1;
 808   bidi_set_paragraph_end (bidi_it);
 809   bidi_it->new_paragraph = 1;
 810   bidi_it->separator_limit = -1;
 811   bidi_it->type = NEUTRAL_B;
 812   bidi_it->type_after_w1 = NEUTRAL_B;
 813   bidi_it->orig_type = NEUTRAL_B;
 814   bidi_it->prev_was_pdf = 0;
 815   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 816     bidi_it->prev.orig_type = UNKNOWN_BT;
 817   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 818     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 819   bidi_it->next_for_neutral.charpos = -1;
 820   bidi_it->next_for_neutral.type =
 821     bidi_it->next_for_neutral.type_after_w1 =
 822     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 823   bidi_it->prev_for_neutral.charpos = -1;
 824   bidi_it->prev_for_neutral.type =
 825     bidi_it->prev_for_neutral.type_after_w1 =
 826     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 827   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 828   bidi_it->disp_pos = -1;       /* invalid/unknown */
 829   bidi_it->disp_prop_p = 0;
 830   /* We can only shrink the cache if we are at the bottom level of its
 831      "stack".  */
 832   if (bidi_cache_start == 0)
 833     bidi_cache_shrink ();
 834   else
 835     bidi_cache_reset ();
 836 }
 837
 838 /* Perform initializations for reordering a new line of bidi text.  */
 839 static void
 840 bidi_line_init (struct bidi_it *bidi_it)
 841 {
 842   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 843   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 844   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 845   bidi_it->invalid_levels = 0;
 846   bidi_it->invalid_rl_levels = -1;
 847   bidi_it->next_en_pos = -1;
 848   bidi_it->next_for_ws.type = UNKNOWN_BT;
 849   bidi_set_sor_type (bidi_it,
 850                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 851                      bidi_it->level_stack[0].level); /* X10 */
 852
 853   bidi_cache_reset ();
 854 }
 855
 856 \f
 857 /***********************************************************************
 858                         Fetching characters
 859  ***********************************************************************/
 860
 861 /* Count bytes in string S between BEG/BEGBYTE and END.  BEG and END
 862    are zero-based character positions in S, BEGBYTE is byte position
 863    corresponding to BEG.  UNIBYTE, if non-zero, means S is a unibyte
 864    string.  */
 865 static inline EMACS_INT
 866 bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
 867                   const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
 868 {
 869   EMACS_INT pos = beg;
 870   const unsigned char *p = s + begbyte, *start = p;
 871
 872   if (unibyte)
 873     p = s + end;
 874   else
 875     {
 876       if (!CHAR_HEAD_P (*p))
 877         abort ();
 878
 879       while (pos < end)
 880         {
 881           p += BYTES_BY_CHAR_HEAD (*p);
 882           pos++;
 883         }
 884     }
 885
 886   return p - start;
 887 }
 888
 889 /* Fetch and returns the character at byte position BYTEPOS.  If S is
 890    non-NULL, fetch the character from string S; otherwise fetch the
 891    character from the current buffer.  UNIBYTE non-zero means S is a
 892    unibyte string.  */
 893 static inline int
 894 bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
 895 {
 896   if (s)
 897     {
 898       if (unibyte)
 899         return s[bytepos];
 900       else
 901         return STRING_CHAR (s + bytepos);
 902     }
 903   else
 904     return FETCH_MULTIBYTE_CHAR (bytepos);
 905 }
 906
 907 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 908    character is covered by a display string, treat the entire run of
 909    covered characters as a single character u+FFFC, and return their
 910    combined length in CH_LEN and NCHARS.  DISP_POS specifies the
 911    character position of the next display string, or -1 if not yet
 912    computed.  DISP_PROP_P non-zero means that there's really a display
 913    string at DISP_POS, as opposed to when we searched till DISP_POS
 914    without findingone.  When the next character is at or beyond that
 915    position, the function updates DISP_POS with the position of the
 916    next display string.  STRING->s is the C string to iterate, or NULL
 917    if iterating over a buffer or a Lisp string; in the latter case,
 918    STRING->lstring is the Lisp string.  */
 919 static inline int
 920 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 921                  int *disp_prop_p, struct bidi_string_data *string,
 922                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 923 {
 924   int ch;
 925   EMACS_INT endpos =
 926     (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
 927   struct text_pos pos;
 928
 929   /* If we got past the last known position of display string, compute
 930      the position of the next one.  That position could be at CHARPOS.  */
 931   if (charpos < endpos && charpos > *disp_pos)
 932     {
 933       SET_TEXT_POS (pos, charpos, bytepos);
 934       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
 935                                               disp_prop_p);
 936     }
 937
 938   /* Fetch the character at BYTEPOS.  */
 939   if (charpos >= endpos)
 940     {
 941       ch = BIDI_EOB;
 942       *ch_len = 1;
 943       *nchars = 1;
 944       *disp_pos = endpos;
 945       *disp_prop_p = 0;
 946     }
 947   else if (charpos >= *disp_pos && *disp_prop_p)
 948     {
 949       EMACS_INT disp_end_pos;
 950
 951       /* We don't expect to find ourselves in the middle of a display
 952          property.  Hopefully, it will never be needed.  */
 953       if (charpos > *disp_pos)
 954         abort ();
 955       /* Return the Unicode Object Replacement Character to represent
 956          the entire run of characters covered by the display string.  */
 957       ch = 0xFFFC;
 958       disp_end_pos = compute_display_string_end (*disp_pos, string);
 959       *nchars = disp_end_pos - *disp_pos;
 960       if (*nchars <= 0)
 961         abort ();
 962       if (string->s)
 963         *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
 964                                     disp_end_pos, string->unibyte);
 965       else if (STRINGP (string->lstring))
 966         *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
 967                                     bytepos, disp_end_pos, string->unibyte);
 968       else
 969         *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 970     }
 971   else
 972     {
 973       if (string->s)
 974         {
 975           int len;
 976
 977           if (!string->unibyte)
 978             {
 979               ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
 980               *ch_len = len;
 981             }
 982           else
 983             {
 984               ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
 985               *ch_len = 1;
 986             }
 987         }
 988       else if (STRINGP (string->lstring))
 989         {
 990           int len;
 991
 992           if (!string->unibyte)
 993             {
 994               ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
 995                                            len);
 996               *ch_len = len;
 997             }
 998           else
 999             {
1000               ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1001               *ch_len = 1;
1002             }
1003         }
1004       else
1005         {
1006           ch = FETCH_MULTIBYTE_CHAR (bytepos);
1007           *ch_len = CHAR_BYTES (ch);
1008         }
1009       *nchars = 1;
1010     }
1011
1012   /* If we just entered a run of characters covered by a display
1013      string, compute the position of the next display string.  */
1014   if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1015       && *disp_prop_p)
1016     {
1017       SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
1018       *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1019                                               disp_prop_p);
1020     }
1021
1022   return ch;
1023 }
1024
1025 \f
1026 /***********************************************************************
1027                         Determining paragraph direction
1028  ***********************************************************************/
1029
1030 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1031    Value is the non-negative length of the paragraph separator
1032    following the buffer position, -1 if position is at the beginning
1033    of a new paragraph, or -2 if position is neither at beginning nor
1034    at end of a paragraph.  */
1035 static EMACS_INT
1036 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1037 {
1038   Lisp_Object sep_re;
1039   Lisp_Object start_re;
1040   EMACS_INT val;
1041
1042   sep_re = paragraph_separate_re;
1043   start_re = paragraph_start_re;
1044
1045   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1046   if (val < 0)
1047     {
1048       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1049         val = -1;
1050       else
1051         val = -2;
1052     }
1053
1054   return val;
1055 }
1056
1057 /* Find the beginning of this paragraph by looking back in the buffer.
1058    Value is the byte position of the paragraph's beginning.  */
1059 static EMACS_INT
1060 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
1061 {
1062   Lisp_Object re = paragraph_start_re;
1063   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1064
1065   while (pos_byte > BEGV_BYTE
1066          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1067     {
1068       /* FIXME: What if the paragraph beginning is covered by a
1069          display string?  And what if a display string covering some
1070          of the text over which we scan back includes
1071          paragraph_start_re?  */
1072       pos = find_next_newline_no_quit (pos - 1, -1);
1073       pos_byte = CHAR_TO_BYTE (pos);
1074     }
1075   return pos_byte;
1076 }
1077
1078 /* Determine the base direction, a.k.a. base embedding level, of the
1079    paragraph we are about to iterate through.  If DIR is either L2R or
1080    R2L, just use that.  Otherwise, determine the paragraph direction
1081    from the first strong directional character of the paragraph.
1082
1083    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1084    has no strong directional characters and both DIR and
1085    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
1086    in the buffer until a paragraph is found with a strong character,
1087    or until hitting BEGV.  In the latter case, fall back to L2R.  This
1088    flag is used in current-bidi-paragraph-direction.
1089
1090    Note that this function gives the paragraph separator the same
1091    direction as the preceding paragraph, even though Emacs generally
1092    views the separartor as not belonging to any paragraph.  */
1093 void
1094 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
1095 {
1096   EMACS_INT bytepos = bidi_it->bytepos;
1097   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1098   EMACS_INT pstartbyte;
1099   /* Note that begbyte is a byte position, while end is a character
1100      position.  Yes, this is ugly, but we are trying to avoid costly
1101      calls to BYTE_TO_CHAR and its ilk.  */
1102   EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1103   EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
1104
1105   /* Special case for an empty buffer. */
1106   if (bytepos == begbyte && bidi_it->charpos == end)
1107     dir = L2R;
1108   /* We should never be called at EOB or before BEGV.  */
1109   else if (bidi_it->charpos >= end || bytepos < begbyte)
1110     abort ();
1111
1112   if (dir == L2R)
1113     {
1114       bidi_it->paragraph_dir = L2R;
1115       bidi_it->new_paragraph = 0;
1116     }
1117   else if (dir == R2L)
1118     {
1119       bidi_it->paragraph_dir = R2L;
1120       bidi_it->new_paragraph = 0;
1121     }
1122   else if (dir == NEUTRAL_DIR)  /* P2 */
1123     {
1124       int ch;
1125       EMACS_INT ch_len, nchars;
1126       EMACS_INT pos, disp_pos = -1;
1127       int disp_prop_p = 0;
1128       bidi_type_t type;
1129       const unsigned char *s;
1130
1131       if (!bidi_initialized)
1132         bidi_initialize ();
1133
1134       /* If we are inside a paragraph separator, we are just waiting
1135          for the separator to be exhausted; use the previous paragraph
1136          direction.  But don't do that if we have been just reseated,
1137          because we need to reinitialize below in that case.  */
1138       if (!bidi_it->first_elt
1139           && bidi_it->charpos < bidi_it->separator_limit)
1140         return;
1141
1142       /* If we are on a newline, get past it to where the next
1143          paragraph might start.  But don't do that at BEGV since then
1144          we are potentially in a new paragraph that doesn't yet
1145          exist.  */
1146       pos = bidi_it->charpos;
1147       s = STRINGP (bidi_it->string.lstring) ?
1148         SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1149       if (bytepos > begbyte
1150           && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
1151         {
1152           bytepos++;
1153           pos++;
1154         }
1155
1156       /* We are either at the beginning of a paragraph or in the
1157          middle of it.  Find where this paragraph starts.  */
1158       if (string_p)
1159         {
1160           /* We don't support changes of paragraph direction inside a
1161              string.  It is treated as a single paragraph.  */
1162           pstartbyte = 0;
1163         }
1164       else
1165         pstartbyte = bidi_find_paragraph_start (pos, bytepos);
1166       bidi_it->separator_limit = -1;
1167       bidi_it->new_paragraph = 0;
1168
1169       /* The following loop is run more than once only if NO_DEFAULT_P
1170          is non-zero, and only if we are iterating on a buffer.  */
1171       do {
1172         bytepos = pstartbyte;
1173         if (!string_p)
1174           pos = BYTE_TO_CHAR (bytepos);
1175         ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop_p,
1176                               &bidi_it->string,
1177                               bidi_it->frame_window_p, &ch_len, &nchars);
1178         type = bidi_get_type (ch, NEUTRAL_DIR);
1179
1180         for (pos += nchars, bytepos += ch_len;
1181              /* NOTE: UAX#9 says to search only for L, AL, or R types
1182                 of characters, and ignore RLE, RLO, LRE, and LRO.
1183                 However, I'm not sure it makes sense to omit those 4;
1184                 should try with and without that to see the effect.  */
1185              (bidi_get_category (type) != STRONG)
1186                || (bidi_ignore_explicit_marks_for_paragraph_level
1187                    && (type == RLE || type == RLO
1188                        || type == LRE || type == LRO));
1189              type = bidi_get_type (ch, NEUTRAL_DIR))
1190           {
1191             if (pos >= end)
1192               {
1193                 /* Pretend there's a paragraph separator at end of
1194                    buffer/string.  */
1195                 type = NEUTRAL_B;
1196                 break;
1197               }
1198             if (!string_p
1199                 && type == NEUTRAL_B
1200                 && bidi_at_paragraph_end (pos, bytepos) >= -1)
1201               break;
1202             /* Fetch next character and advance to get past it.  */
1203             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1204                                   &disp_prop_p, &bidi_it->string,
1205                                   bidi_it->frame_window_p, &ch_len, &nchars);
1206             pos += nchars;
1207             bytepos += ch_len;
1208           }
1209         if (type == STRONG_R || type == STRONG_AL) /* P3 */
1210           bidi_it->paragraph_dir = R2L;
1211         else if (type == STRONG_L)
1212           bidi_it->paragraph_dir = L2R;
1213         if (!string_p
1214             && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
1215           {
1216             /* If this paragraph is at BEGV, default to L2R.  */
1217             if (pstartbyte == BEGV_BYTE)
1218               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1219             else
1220               {
1221                 EMACS_INT prevpbyte = pstartbyte;
1222                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1223
1224                 /* Find the beginning of the previous paragraph, if any.  */
1225                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1226                   {
1227                     /* FXIME: What if p is covered by a display
1228                        string?  See also a FIXME inside
1229                        bidi_find_paragraph_start.  */
1230                     p--;
1231                     pbyte = CHAR_TO_BYTE (p);
1232                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
1233                   }
1234                 pstartbyte = prevpbyte;
1235               }
1236           }
1237       } while (!string_p
1238                && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
1239     }
1240   else
1241     abort ();
1242
1243   /* Contrary to UAX#9 clause P3, we only default the paragraph
1244      direction to L2R if we have no previous usable paragraph
1245      direction.  This is allowed by the HL1 clause.  */
1246   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
1247     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
1248   if (bidi_it->paragraph_dir == R2L)
1249     bidi_it->level_stack[0].level = 1;
1250   else
1251     bidi_it->level_stack[0].level = 0;
1252
1253   bidi_line_init (bidi_it);
1254 }
1255
1256 \f
1257 /***********************************************************************
1258                  Resolving explicit and implicit levels.
1259   The rest of this file constitutes the core of the UBA implementation.
1260  ***********************************************************************/
1261
1262 static inline int
1263 bidi_explicit_dir_char (int ch)
1264 {
1265   bidi_type_t ch_type;
1266
1267   if (!bidi_initialized)
1268     abort ();
1269   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1270   return (ch_type == LRE || ch_type == LRO
1271           || ch_type == RLE || ch_type == RLO
1272           || ch_type == PDF);
1273 }
1274
1275 /* A helper function for bidi_resolve_explicit.  It advances to the
1276    next character in logical order and determines the new embedding
1277    level and directional override, but does not take into account
1278    empty embeddings.  */
1279 static int
1280 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1281 {
1282   int curchar;
1283   bidi_type_t type;
1284   int current_level;
1285   int new_level;
1286   bidi_dir_t override;
1287   int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
1288
1289   /* If reseat()'ed, don't advance, so as to start iteration from the
1290      position where we were reseated.  bidi_it->bytepos can be less
1291      than BEGV_BYTE after reseat to BEGV.  */
1292   if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
1293       || bidi_it->first_elt)
1294     {
1295       bidi_it->first_elt = 0;
1296       if (string_p)
1297         {
1298           const unsigned char *p =
1299             STRINGP (bidi_it->string.lstring)
1300             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1301
1302           if (bidi_it->charpos < 0)
1303             bidi_it->charpos = 0;
1304           bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1305                                                bidi_it->string.unibyte);
1306         }
1307       else
1308         {
1309           if (bidi_it->charpos < BEGV)
1310             bidi_it->charpos = BEGV;
1311           bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1312         }
1313     }
1314   /* Don't move at end of buffer/string.  */
1315   else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
1316     {
1317       /* Advance to the next character, skipping characters covered by
1318          display strings (nchars > 1).  */
1319       if (bidi_it->nchars <= 0)
1320         abort ();
1321       bidi_it->charpos += bidi_it->nchars;
1322       if (bidi_it->ch_len == 0)
1323         abort ();
1324       bidi_it->bytepos += bidi_it->ch_len;
1325     }
1326
1327   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1328   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1329   new_level = current_level;
1330
1331   if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
1332     {
1333       curchar = BIDI_EOB;
1334       bidi_it->ch_len = 1;
1335       bidi_it->nchars = 1;
1336       bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
1337       bidi_it->disp_prop_p = 0;
1338     }
1339   else
1340     {
1341       /* Fetch the character at BYTEPOS.  If it is covered by a
1342          display string, treat the entire run of covered characters as
1343          a single character u+FFFC.  */
1344       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
1345                                  &bidi_it->disp_pos, &bidi_it->disp_prop_p,
1346                                  &bidi_it->string, bidi_it->frame_window_p,
1347                                  &bidi_it->ch_len, &bidi_it->nchars);
1348     }
1349   bidi_it->ch = curchar;
1350
1351   /* Don't apply directional override here, as all the types we handle
1352      below will not be affected by the override anyway, and we need
1353      the original type unaltered.  The override will be applied in
1354      bidi_resolve_weak.  */
1355   type = bidi_get_type (curchar, NEUTRAL_DIR);
1356   bidi_it->orig_type = type;
1357   bidi_check_type (bidi_it->orig_type);
1358
1359   if (type != PDF)
1360     bidi_it->prev_was_pdf = 0;
1361
1362   bidi_it->type_after_w1 = UNKNOWN_BT;
1363
1364   switch (type)
1365     {
1366       case RLE: /* X2 */
1367       case RLO: /* X4 */
1368         bidi_it->type_after_w1 = type;
1369         bidi_check_type (bidi_it->type_after_w1);
1370         type = WEAK_BN; /* X9/Retaining */
1371         if (bidi_it->ignore_bn_limit <= -1)
1372           {
1373             if (current_level <= BIDI_MAXLEVEL - 4)
1374               {
1375                 /* Compute the least odd embedding level greater than
1376                    the current level.  */
1377                 new_level = ((current_level + 1) & ~1) + 1;
1378                 if (bidi_it->type_after_w1 == RLE)
1379                   override = NEUTRAL_DIR;
1380                 else
1381                   override = R2L;
1382                 if (current_level == BIDI_MAXLEVEL - 4)
1383                   bidi_it->invalid_rl_levels = 0;
1384                 bidi_push_embedding_level (bidi_it, new_level, override);
1385               }
1386             else
1387               {
1388                 bidi_it->invalid_levels++;
1389                 /* See the commentary about invalid_rl_levels below.  */
1390                 if (bidi_it->invalid_rl_levels < 0)
1391                   bidi_it->invalid_rl_levels = 0;
1392                 bidi_it->invalid_rl_levels++;
1393               }
1394           }
1395         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1396                  || bidi_it->next_en_pos > bidi_it->charpos)
1397           type = WEAK_EN;
1398         break;
1399       case LRE: /* X3 */
1400       case LRO: /* X5 */
1401         bidi_it->type_after_w1 = type;
1402         bidi_check_type (bidi_it->type_after_w1);
1403         type = WEAK_BN; /* X9/Retaining */
1404         if (bidi_it->ignore_bn_limit <= -1)
1405           {
1406             if (current_level <= BIDI_MAXLEVEL - 5)
1407               {
1408                 /* Compute the least even embedding level greater than
1409                    the current level.  */
1410                 new_level = ((current_level + 2) & ~1);
1411                 if (bidi_it->type_after_w1 == LRE)
1412                   override = NEUTRAL_DIR;
1413                 else
1414                   override = L2R;
1415                 bidi_push_embedding_level (bidi_it, new_level, override);
1416               }
1417             else
1418               {
1419                 bidi_it->invalid_levels++;
1420                 /* invalid_rl_levels counts invalid levels encountered
1421                    while the embedding level was already too high for
1422                    LRE/LRO, but not for RLE/RLO.  That is because
1423                    there may be exactly one PDF which we should not
1424                    ignore even though invalid_levels is non-zero.
1425                    invalid_rl_levels helps to know what PDF is
1426                    that.  */
1427                 if (bidi_it->invalid_rl_levels >= 0)
1428                   bidi_it->invalid_rl_levels++;
1429               }
1430           }
1431         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1432                  || bidi_it->next_en_pos > bidi_it->charpos)
1433           type = WEAK_EN;
1434         break;
1435       case PDF: /* X7 */
1436         bidi_it->type_after_w1 = type;
1437         bidi_check_type (bidi_it->type_after_w1);
1438         type = WEAK_BN; /* X9/Retaining */
1439         if (bidi_it->ignore_bn_limit <= -1)
1440           {
1441             if (!bidi_it->invalid_rl_levels)
1442               {
1443                 new_level = bidi_pop_embedding_level (bidi_it);
1444                 bidi_it->invalid_rl_levels = -1;
1445                 if (bidi_it->invalid_levels)
1446                   bidi_it->invalid_levels--;
1447                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1448               }
1449             if (!bidi_it->invalid_levels)
1450               new_level = bidi_pop_embedding_level (bidi_it);
1451             else
1452               {
1453                 bidi_it->invalid_levels--;
1454                 bidi_it->invalid_rl_levels--;
1455               }
1456           }
1457         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1458                  || bidi_it->next_en_pos > bidi_it->charpos)
1459           type = WEAK_EN;
1460         break;
1461       default:
1462         /* Nothing.  */
1463         break;
1464     }
1465
1466   bidi_it->type = type;
1467   bidi_check_type (bidi_it->type);
1468
1469   return new_level;
1470 }
1471
1472 /* Given an iterator state in BIDI_IT, advance one character position
1473    in the buffer/string to the next character (in the logical order),
1474    resolve any explicit embeddings and directional overrides, and
1475    return the embedding level of the character after resolving
1476    explicit directives and ignoring empty embeddings.  */
1477 static int
1478 bidi_resolve_explicit (struct bidi_it *bidi_it)
1479 {
1480   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1481   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1482   EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
1483   const unsigned char *s = STRINGP (bidi_it->string.lstring)
1484     ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1485
1486   if (prev_level < new_level
1487       && bidi_it->type == WEAK_BN
1488       && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1489       && bidi_it->charpos < eob         /* not already at EOB */
1490       && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1491                                                    + bidi_it->ch_len, s,
1492                                                    bidi_it->string.unibyte)))
1493     {
1494       /* Avoid pushing and popping embedding levels if the level run
1495          is empty, as this breaks level runs where it shouldn't.
1496          UAX#9 removes all the explicit embedding and override codes,
1497          so empty embeddings disappear without a trace.  We need to
1498          behave as if we did the same.  */
1499       struct bidi_it saved_it;
1500       int level = prev_level;
1501
1502       bidi_copy_it (&saved_it, bidi_it);
1503
1504       while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
1505                                                        + bidi_it->ch_len, s,
1506                                                        bidi_it->string.unibyte)))
1507         {
1508           /* This advances to the next character, skipping any
1509              characters covered by display strings.  */
1510           level = bidi_resolve_explicit_1 (bidi_it);
1511           /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1512              a pointer to its data is no longer valid.  */
1513           if (STRINGP (bidi_it->string.lstring))
1514             s = SDATA (bidi_it->string.lstring);
1515         }
1516
1517       if (bidi_it->nchars <= 0)
1518         abort ();
1519       if (level == prev_level)  /* empty embedding */
1520         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1521       else                      /* this embedding is non-empty */
1522         saved_it.ignore_bn_limit = -2;
1523
1524       bidi_copy_it (bidi_it, &saved_it);
1525       if (bidi_it->ignore_bn_limit > -1)
1526         {
1527           /* We pushed a level, but we shouldn't have.  Undo that. */
1528           if (!bidi_it->invalid_rl_levels)
1529             {
1530               new_level = bidi_pop_embedding_level (bidi_it);
1531               bidi_it->invalid_rl_levels = -1;
1532               if (bidi_it->invalid_levels)
1533                 bidi_it->invalid_levels--;
1534             }
1535           if (!bidi_it->invalid_levels)
1536             new_level = bidi_pop_embedding_level (bidi_it);
1537           else
1538             {
1539               bidi_it->invalid_levels--;
1540               bidi_it->invalid_rl_levels--;
1541             }
1542         }
1543     }
1544
1545   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1546     {
1547       bidi_set_paragraph_end (bidi_it);
1548       /* This is needed by bidi_resolve_weak below, and in L1.  */
1549       bidi_it->type_after_w1 = bidi_it->type;
1550       bidi_check_type (bidi_it->type_after_w1);
1551     }
1552
1553   return new_level;
1554 }
1555
1556 /* Advance in the buffer/string, resolve weak types and return the
1557    type of the next character after weak type resolution.  */
1558 static bidi_type_t
1559 bidi_resolve_weak (struct bidi_it *bidi_it)
1560 {
1561   bidi_type_t type;
1562   bidi_dir_t override;
1563   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1564   int new_level  = bidi_resolve_explicit (bidi_it);
1565   int next_char;
1566   bidi_type_t type_of_next;
1567   struct bidi_it saved_it;
1568   EMACS_INT eob =
1569     (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1570     ? bidi_it->string.schars : ZV;
1571
1572   type = bidi_it->type;
1573   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1574
1575   if (type == UNKNOWN_BT
1576       || type == LRE
1577       || type == LRO
1578       || type == RLE
1579       || type == RLO
1580       || type == PDF)
1581     abort ();
1582
1583   if (new_level != prev_level
1584       || bidi_it->type == NEUTRAL_B)
1585     {
1586       /* We've got a new embedding level run, compute the directional
1587          type of sor and initialize per-run variables (UAX#9, clause
1588          X10).  */
1589       bidi_set_sor_type (bidi_it, prev_level, new_level);
1590     }
1591   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1592            || type == WEAK_BN || type == STRONG_AL)
1593     bidi_it->type_after_w1 = type;      /* needed in L1 */
1594   bidi_check_type (bidi_it->type_after_w1);
1595
1596   /* Level and directional override status are already recorded in
1597      bidi_it, and do not need any change; see X6.  */
1598   if (override == R2L)          /* X6 */
1599     type = STRONG_R;
1600   else if (override == L2R)
1601     type = STRONG_L;
1602   else
1603     {
1604       if (type == WEAK_NSM)     /* W1 */
1605         {
1606           /* Note that we don't need to consider the case where the
1607              prev character has its type overridden by an RLO or LRO,
1608              because then either the type of this NSM would have been
1609              also overridden, or the previous character is outside the
1610              current level run, and thus not relevant to this NSM.
1611              This is why NSM gets the type_after_w1 of the previous
1612              character.  */
1613           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1614               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1615               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1616             type = bidi_it->prev.type_after_w1;
1617           else if (bidi_it->sor == R2L)
1618             type = STRONG_R;
1619           else if (bidi_it->sor == L2R)
1620             type = STRONG_L;
1621           else /* shouldn't happen! */
1622             abort ();
1623         }
1624       if (type == WEAK_EN       /* W2 */
1625           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1626         type = WEAK_AN;
1627       else if (type == STRONG_AL) /* W3 */
1628         type = STRONG_R;
1629       else if ((type == WEAK_ES /* W4 */
1630                 && bidi_it->prev.type_after_w1 == WEAK_EN
1631                 && bidi_it->prev.orig_type == WEAK_EN)
1632                || (type == WEAK_CS
1633                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1634                         && bidi_it->prev.orig_type == WEAK_EN)
1635                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1636         {
1637           const unsigned char *s =
1638             STRINGP (bidi_it->string.lstring)
1639             ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1640
1641           next_char =
1642             bidi_it->charpos + bidi_it->nchars >= eob
1643             ? BIDI_EOB
1644             : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1645                                 bidi_it->string.unibyte);
1646           type_of_next = bidi_get_type (next_char, override);
1647
1648           if (type_of_next == WEAK_BN
1649               || bidi_explicit_dir_char (next_char))
1650             {
1651               bidi_copy_it (&saved_it, bidi_it);
1652               while (bidi_resolve_explicit (bidi_it) == new_level
1653                      && bidi_it->type == WEAK_BN)
1654                 ;
1655               type_of_next = bidi_it->type;
1656               bidi_copy_it (bidi_it, &saved_it);
1657             }
1658
1659           /* If the next character is EN, but the last strong-type
1660              character is AL, that next EN will be changed to AN when
1661              we process it in W2 above.  So in that case, this ES
1662              should not be changed into EN.  */
1663           if (type == WEAK_ES
1664               && type_of_next == WEAK_EN
1665               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1666             type = WEAK_EN;
1667           else if (type == WEAK_CS)
1668             {
1669               if (bidi_it->prev.type_after_w1 == WEAK_AN
1670                   && (type_of_next == WEAK_AN
1671                       /* If the next character is EN, but the last
1672                          strong-type character is AL, EN will be later
1673                          changed to AN when we process it in W2 above.
1674                          So in that case, this ES should not be
1675                          changed into EN.  */
1676                       || (type_of_next == WEAK_EN
1677                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1678                 type = WEAK_AN;
1679               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1680                        && type_of_next == WEAK_EN
1681                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1682                 type = WEAK_EN;
1683             }
1684         }
1685       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1686                || type == WEAK_BN)      /* W5/Retaining */
1687         {
1688           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1689               || bidi_it->next_en_pos > bidi_it->charpos)
1690             type = WEAK_EN;
1691           else                  /* W5: ET/BN with EN after it.  */
1692             {
1693               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1694               const unsigned char *s =
1695                 STRINGP (bidi_it->string.lstring)
1696                 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1697
1698               if (bidi_it->nchars <= 0)
1699                 abort ();
1700               next_char =
1701                 bidi_it->charpos + bidi_it->nchars >= eob
1702                 ? BIDI_EOB
1703                 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1704                                     bidi_it->string.unibyte);
1705               type_of_next = bidi_get_type (next_char, override);
1706
1707               if (type_of_next == WEAK_ET
1708                   || type_of_next == WEAK_BN
1709                   || bidi_explicit_dir_char (next_char))
1710                 {
1711                   bidi_copy_it (&saved_it, bidi_it);
1712                   while (bidi_resolve_explicit (bidi_it) == new_level
1713                          && (bidi_it->type == WEAK_BN
1714                              || bidi_it->type == WEAK_ET))
1715                     ;
1716                   type_of_next = bidi_it->type;
1717                   en_pos = bidi_it->charpos;
1718                   bidi_copy_it (bidi_it, &saved_it);
1719                 }
1720               if (type_of_next == WEAK_EN)
1721                 {
1722                   /* If the last strong character is AL, the EN we've
1723                      found will become AN when we get to it (W2). */
1724                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1725                     {
1726                       type = WEAK_EN;
1727                       /* Remember this EN position, to speed up processing
1728                          of the next ETs.  */
1729                       bidi_it->next_en_pos = en_pos;
1730                     }
1731                   else if (type == WEAK_BN)
1732                     type = NEUTRAL_ON; /* W6/Retaining */
1733                 }
1734             }
1735         }
1736     }
1737
1738   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1739       || (type == WEAK_BN
1740           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1741               || bidi_it->prev.type_after_w1 == WEAK_ES
1742               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1743     type = NEUTRAL_ON;
1744
1745   /* Store the type we've got so far, before we clobber it with strong
1746      types in W7 and while resolving neutral types.  But leave alone
1747      the original types that were recorded above, because we will need
1748      them for the L1 clause.  */
1749   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1750     bidi_it->type_after_w1 = type;
1751   bidi_check_type (bidi_it->type_after_w1);
1752
1753   if (type == WEAK_EN)  /* W7 */
1754     {
1755       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1756           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1757         type = STRONG_L;
1758     }
1759
1760   bidi_it->type = type;
1761   bidi_check_type (bidi_it->type);
1762   return type;
1763 }
1764
1765 /* Resolve the type of a neutral character according to the type of
1766    surrounding strong text and the current embedding level.  */
1767 static inline bidi_type_t
1768 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1769 {
1770   /* N1: European and Arabic numbers are treated as though they were R.  */
1771   if (next_type == WEAK_EN || next_type == WEAK_AN)
1772     next_type = STRONG_R;
1773   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1774     prev_type = STRONG_R;
1775
1776   if (next_type == prev_type)   /* N1 */
1777     return next_type;
1778   else if ((lev & 1) == 0)      /* N2 */
1779     return STRONG_L;
1780   else
1781     return STRONG_R;
1782 }
1783
1784 static bidi_type_t
1785 bidi_resolve_neutral (struct bidi_it *bidi_it)
1786 {
1787   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1788   bidi_type_t type = bidi_resolve_weak (bidi_it);
1789   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1790
1791   if (!(type == STRONG_R
1792         || type == STRONG_L
1793         || type == WEAK_BN
1794         || type == WEAK_EN
1795         || type == WEAK_AN
1796         || type == NEUTRAL_B
1797         || type == NEUTRAL_S
1798         || type == NEUTRAL_WS
1799         || type == NEUTRAL_ON))
1800     abort ();
1801
1802   if (bidi_get_category (type) == NEUTRAL
1803       || (type == WEAK_BN && prev_level == current_level))
1804     {
1805       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1806         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1807                                        bidi_it->next_for_neutral.type,
1808                                        current_level);
1809       else
1810         {
1811           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1812              the assumption of batch-style processing; see clauses W4,
1813              W5, and especially N1, which require to look far forward
1814              (as well as back) in the buffer/string.  May the fleas of
1815              a thousand camels infest the armpits of those who design
1816              supposedly general-purpose algorithms by looking at their
1817              own implementations, and fail to consider other possible
1818              implementations!  */
1819           struct bidi_it saved_it;
1820           bidi_type_t next_type;
1821
1822           if (bidi_it->scan_dir == -1)
1823             abort ();
1824
1825           bidi_copy_it (&saved_it, bidi_it);
1826           /* Scan the text forward until we find the first non-neutral
1827              character, and then use that to resolve the neutral we
1828              are dealing with now.  We also cache the scanned iterator
1829              states, to salvage some of the effort later.  */
1830           bidi_cache_iterator_state (bidi_it, 0);
1831           do {
1832             /* Record the info about the previous character, so that
1833                it will be cached below with this state.  */
1834             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1835                 && bidi_it->type != WEAK_BN)
1836               bidi_remember_char (&bidi_it->prev, bidi_it);
1837             type = bidi_resolve_weak (bidi_it);
1838             /* Paragraph separators have their levels fully resolved
1839                at this point, so cache them as resolved.  */
1840             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1841             /* FIXME: implement L1 here, by testing for a newline and
1842                resetting the level for any sequence of whitespace
1843                characters adjacent to it.  */
1844           } while (!(type == NEUTRAL_B
1845                      || (type != WEAK_BN
1846                          && bidi_get_category (type) != NEUTRAL)
1847                      /* This is all per level run, so stop when we
1848                         reach the end of this level run.  */
1849                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1850                      current_level));
1851
1852           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1853
1854           switch (type)
1855             {
1856               case STRONG_L:
1857               case STRONG_R:
1858               case STRONG_AL:
1859                 next_type = type;
1860                 break;
1861               case WEAK_EN:
1862               case WEAK_AN:
1863                 /* N1: ``European and Arabic numbers are treated as
1864                    though they were R.''  */
1865                 next_type = STRONG_R;
1866                 saved_it.next_for_neutral.type = STRONG_R;
1867                 break;
1868               case WEAK_BN:
1869                 if (!bidi_explicit_dir_char (bidi_it->ch))
1870                   abort ();             /* can't happen: BNs are skipped */
1871                 /* FALLTHROUGH */
1872               case NEUTRAL_B:
1873                 /* Marched all the way to the end of this level run.
1874                    We need to use the eor type, whose information is
1875                    stored by bidi_set_sor_type in the prev_for_neutral
1876                    member.  */
1877                 if (saved_it.type != WEAK_BN
1878                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1879                   {
1880                     next_type = bidi_it->prev_for_neutral.type;
1881                     saved_it.next_for_neutral.type = next_type;
1882                     bidi_check_type (next_type);
1883                   }
1884                 else
1885                   {
1886                     /* This is a BN which does not adjoin neutrals.
1887                        Leave its type alone.  */
1888                     bidi_copy_it (bidi_it, &saved_it);
1889                     return bidi_it->type;
1890                   }
1891                 break;
1892               default:
1893                 abort ();
1894             }
1895           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1896                                          next_type, current_level);
1897           saved_it.type = type;
1898           bidi_check_type (type);
1899           bidi_copy_it (bidi_it, &saved_it);
1900         }
1901     }
1902   return type;
1903 }
1904
1905 /* Given an iterator state in BIDI_IT, advance one character position
1906    in the buffer/string to the next character (in the logical order),
1907    resolve the bidi type of that next character, and return that
1908    type.  */
1909 static bidi_type_t
1910 bidi_type_of_next_char (struct bidi_it *bidi_it)
1911 {
1912   bidi_type_t type;
1913
1914   /* This should always be called during a forward scan.  */
1915   if (bidi_it->scan_dir != 1)
1916     abort ();
1917
1918   /* Reset the limit until which to ignore BNs if we step out of the
1919      area where we found only empty levels.  */
1920   if ((bidi_it->ignore_bn_limit > -1
1921        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1922       || (bidi_it->ignore_bn_limit == -2
1923           && !bidi_explicit_dir_char (bidi_it->ch)))
1924     bidi_it->ignore_bn_limit = -1;
1925
1926   type = bidi_resolve_neutral (bidi_it);
1927
1928   return type;
1929 }
1930
1931 /* Given an iterator state BIDI_IT, advance one character position in
1932    the buffer/string to the next character (in the current scan
1933    direction), resolve the embedding and implicit levels of that next
1934    character, and return the resulting level.  */
1935 static int
1936 bidi_level_of_next_char (struct bidi_it *bidi_it)
1937 {
1938   bidi_type_t type;
1939   int level, prev_level = -1;
1940   struct bidi_saved_info next_for_neutral;
1941   EMACS_INT next_char_pos = -2;
1942
1943   if (bidi_it->scan_dir == 1)
1944     {
1945       EMACS_INT eob =
1946         (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1947         ? bidi_it->string.schars : ZV;
1948
1949       /* There's no sense in trying to advance if we hit end of text.  */
1950       if (bidi_it->charpos >= eob)
1951         return bidi_it->resolved_level;
1952
1953       /* Record the info about the previous character.  */
1954       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1955           && bidi_it->type != WEAK_BN)
1956         bidi_remember_char (&bidi_it->prev, bidi_it);
1957       if (bidi_it->type_after_w1 == STRONG_R
1958           || bidi_it->type_after_w1 == STRONG_L
1959           || bidi_it->type_after_w1 == STRONG_AL)
1960         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1961       /* FIXME: it sounds like we don't need both prev and
1962          prev_for_neutral members, but I'm leaving them both for now.  */
1963       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1964           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1965         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1966
1967       /* If we overstepped the characters used for resolving neutrals
1968          and whitespace, invalidate their info in the iterator.  */
1969       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1970         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1971       if (bidi_it->next_en_pos >= 0
1972           && bidi_it->charpos >= bidi_it->next_en_pos)
1973         bidi_it->next_en_pos = -1;
1974       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1975           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1976         bidi_it->next_for_ws.type = UNKNOWN_BT;
1977
1978       /* This must be taken before we fill the iterator with the info
1979          about the next char.  If we scan backwards, the iterator
1980          state must be already cached, so there's no need to know the
1981          embedding level of the previous character, since we will be
1982          returning to our caller shortly.  */
1983       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1984     }
1985   next_for_neutral = bidi_it->next_for_neutral;
1986
1987   /* Perhaps the character we want is already cached.  If it is, the
1988      call to bidi_cache_find below will return a type other than
1989      UNKNOWN_BT.  */
1990   if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
1991     {
1992       int bob =
1993         (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
1994
1995       if (bidi_it->scan_dir > 0)
1996         {
1997           if (bidi_it->nchars <= 0)
1998             abort ();
1999           next_char_pos = bidi_it->charpos + bidi_it->nchars;
2000         }
2001       else if (bidi_it->charpos >= bob)
2002         /* Implementation note: we allow next_char_pos to be as low as
2003            0 for buffers or -1 for strings, and that is okay because
2004            that's the "position" of the sentinel iterator state we
2005            cached at the beginning of the iteration.  */
2006         next_char_pos = bidi_it->charpos - 1;
2007       if (next_char_pos >= bob - 1)
2008         type = bidi_cache_find (next_char_pos, -1, bidi_it);
2009       else
2010         type = UNKNOWN_BT;
2011     }
2012   else
2013     type = UNKNOWN_BT;
2014   if (type != UNKNOWN_BT)
2015     {
2016       /* Don't lose the information for resolving neutrals!  The
2017          cached states could have been cached before their
2018          next_for_neutral member was computed.  If we are on our way
2019          forward, we can simply take the info from the previous
2020          state.  */
2021       if (bidi_it->scan_dir == 1
2022           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2023         bidi_it->next_for_neutral = next_for_neutral;
2024
2025       /* If resolved_level is -1, it means this state was cached
2026          before it was completely resolved, so we cannot return
2027          it.  */
2028       if (bidi_it->resolved_level != -1)
2029         return bidi_it->resolved_level;
2030     }
2031   if (bidi_it->scan_dir == -1)
2032     /* If we are going backwards, the iterator state is already cached
2033        from previous scans, and should be fully resolved.  */
2034     abort ();
2035
2036   if (type == UNKNOWN_BT)
2037     type = bidi_type_of_next_char (bidi_it);
2038
2039   if (type == NEUTRAL_B)
2040     return bidi_it->resolved_level;
2041
2042   level = bidi_it->level_stack[bidi_it->stack_idx].level;
2043   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2044       || (type == WEAK_BN && prev_level == level))
2045     {
2046       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2047         abort ();
2048
2049       /* If the cached state shows a neutral character, it was not
2050          resolved by bidi_resolve_neutral, so do it now.  */
2051       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2052                                      bidi_it->next_for_neutral.type,
2053                                      level);
2054     }
2055
2056   if (!(type == STRONG_R
2057         || type == STRONG_L
2058         || type == WEAK_BN
2059         || type == WEAK_EN
2060         || type == WEAK_AN))
2061     abort ();
2062   bidi_it->type = type;
2063   bidi_check_type (bidi_it->type);
2064
2065   /* For L1 below, we need to know, for each WS character, whether
2066      it belongs to a sequence of WS characters preceding a newline
2067      or a TAB or a paragraph separator.  */
2068   if (bidi_it->orig_type == NEUTRAL_WS
2069       && bidi_it->next_for_ws.type == UNKNOWN_BT)
2070     {
2071       int ch;
2072       EMACS_INT clen = bidi_it->ch_len;
2073       EMACS_INT bpos = bidi_it->bytepos;
2074       EMACS_INT cpos = bidi_it->charpos;
2075       EMACS_INT disp_pos = bidi_it->disp_pos;
2076       EMACS_INT nc = bidi_it->nchars;
2077       struct bidi_string_data bs = bidi_it->string;
2078       bidi_type_t chtype;
2079       int fwp = bidi_it->frame_window_p;
2080       int dpp = bidi_it->disp_prop_p;
2081
2082       if (bidi_it->nchars <= 0)
2083         abort ();
2084       do {
2085         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2086                               fwp, &clen, &nc);
2087         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
2088           chtype = NEUTRAL_B;
2089         else
2090           chtype = bidi_get_type (ch, NEUTRAL_DIR);
2091       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2092                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2093       bidi_it->next_for_ws.type = chtype;
2094       bidi_check_type (bidi_it->next_for_ws.type);
2095       bidi_it->next_for_ws.charpos = cpos;
2096       bidi_it->next_for_ws.bytepos = bpos;
2097     }
2098
2099   /* Resolve implicit levels, with a twist: PDFs get the embedding
2100      level of the enbedding they terminate.  See below for the
2101      reason.  */
2102   if (bidi_it->orig_type == PDF
2103       /* Don't do this if this formatting code didn't change the
2104          embedding level due to invalid or empty embeddings.  */
2105       && prev_level != level)
2106     {
2107       /* Don't look in UAX#9 for the reason for this: it's our own
2108          private quirk.  The reason is that we want the formatting
2109          codes to be delivered so that they bracket the text of their
2110          embedding.  For example, given the text
2111
2112              {RLO}teST{PDF}
2113
2114          we want it to be displayed as
2115
2116              {PDF}STet{RLO}
2117
2118          not as
2119
2120              STet{RLO}{PDF}
2121
2122          which will result because we bump up the embedding level as
2123          soon as we see the RLO and pop it as soon as we see the PDF,
2124          so RLO itself has the same embedding level as "teST", and
2125          thus would be normally delivered last, just before the PDF.
2126          The switch below fiddles with the level of PDF so that this
2127          ugly side effect does not happen.
2128
2129          (This is, of course, only important if the formatting codes
2130          are actually displayed, but Emacs does need to display them
2131          if the user wants to.)  */
2132       level = prev_level;
2133     }
2134   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2135            || bidi_it->orig_type == NEUTRAL_S
2136            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2137            /* || bidi_it->ch == LINESEP_CHAR */
2138            || (bidi_it->orig_type == NEUTRAL_WS
2139                && (bidi_it->next_for_ws.type == NEUTRAL_B
2140                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
2141     level = bidi_it->level_stack[0].level;
2142   else if ((level & 1) == 0) /* I1 */
2143     {
2144       if (type == STRONG_R)
2145         level++;
2146       else if (type == WEAK_EN || type == WEAK_AN)
2147         level += 2;
2148     }
2149   else                  /* I2 */
2150     {
2151       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2152         level++;
2153     }
2154
2155   bidi_it->resolved_level = level;
2156   return level;
2157 }
2158
2159 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
2160    non-zero, we are at the end of a level, and we need to prepare to
2161    resume the scan of the lower level.
2162
2163    If this level's other edge is cached, we simply jump to it, filling
2164    the iterator structure with the iterator state on the other edge.
2165    Otherwise, we walk the buffer or string until we come back to the
2166    same level as LEVEL.
2167
2168    Note: we are not talking here about a ``level run'' in the UAX#9
2169    sense of the term, but rather about a ``level'' which includes
2170    all the levels higher than it.  In other words, given the levels
2171    like this:
2172
2173          11111112222222333333334443343222222111111112223322111
2174                 A      B                    C
2175
2176    and assuming we are at point A scanning left to right, this
2177    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2178    at point B.  */
2179 static void
2180 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2181 {
2182   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2183   ptrdiff_t idx;
2184
2185   /* Try the cache first.  */
2186   if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2187       >= bidi_cache_start)
2188     bidi_cache_fetch_state (idx, bidi_it);
2189   else
2190     {
2191       int new_level;
2192
2193       if (end_flag)
2194         abort (); /* if we are at end of level, its edges must be cached */
2195
2196       bidi_cache_iterator_state (bidi_it, 1);
2197       do {
2198         new_level = bidi_level_of_next_char (bidi_it);
2199         bidi_cache_iterator_state (bidi_it, 1);
2200       } while (new_level >= level);
2201     }
2202 }
2203
2204 void
2205 bidi_move_to_visually_next (struct bidi_it *bidi_it)
2206 {
2207   int old_level, new_level, next_level;
2208   struct bidi_it sentinel;
2209   struct gcpro gcpro1;
2210
2211   if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2212     abort ();
2213
2214   if (bidi_it->scan_dir == 0)
2215     {
2216       bidi_it->scan_dir = 1;    /* default to logical order */
2217     }
2218
2219   /* The code below can call eval, and thus cause GC.  If we are
2220      iterating a Lisp string, make sure it won't be GCed.  */
2221   if (STRINGP (bidi_it->string.lstring))
2222     GCPRO1 (bidi_it->string.lstring);
2223
2224   /* If we just passed a newline, initialize for the next line.  */
2225   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
2226     bidi_line_init (bidi_it);
2227
2228   /* Prepare the sentinel iterator state, and cache it.  When we bump
2229      into it, scanning backwards, we'll know that the last non-base
2230      level is exhausted.  */
2231   if (bidi_cache_idx == bidi_cache_start)
2232     {
2233       bidi_copy_it (&sentinel, bidi_it);
2234       if (bidi_it->first_elt)
2235         {
2236           sentinel.charpos--;   /* cached charpos needs to be monotonic */
2237           sentinel.bytepos--;
2238           sentinel.ch = '\n';   /* doesn't matter, but why not? */
2239           sentinel.ch_len = 1;
2240           sentinel.nchars = 1;
2241         }
2242       bidi_cache_iterator_state (&sentinel, 1);
2243     }
2244
2245   old_level = bidi_it->resolved_level;
2246   new_level = bidi_level_of_next_char (bidi_it);
2247
2248   /* Reordering of resolved levels (clause L2) is implemented by
2249      jumping to the other edge of the level and flipping direction of
2250      scanning the text whenever we find a level change.  */
2251   if (new_level != old_level)
2252     {
2253       int ascending = new_level > old_level;
2254       int level_to_search = ascending ? old_level + 1 : old_level;
2255       int incr = ascending ? 1 : -1;
2256       int expected_next_level = old_level + incr;
2257
2258       /* Jump (or walk) to the other edge of this level.  */
2259       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2260       /* Switch scan direction and peek at the next character in the
2261          new direction.  */
2262       bidi_it->scan_dir = -bidi_it->scan_dir;
2263
2264       /* The following loop handles the case where the resolved level
2265          jumps by more than one.  This is typical for numbers inside a
2266          run of text with left-to-right embedding direction, but can
2267          also happen in other situations.  In those cases the decision
2268          where to continue after a level change, and in what direction,
2269          is tricky.  For example, given a text like below:
2270
2271                   abcdefgh
2272                   11336622
2273
2274          (where the numbers below the text show the resolved levels),
2275          the result of reordering according to UAX#9 should be this:
2276
2277                   efdcghba
2278
2279          This is implemented by the loop below which flips direction
2280          and jumps to the other edge of the level each time it finds
2281          the new level not to be the expected one.  The expected level
2282          is always one more or one less than the previous one.  */
2283       next_level = bidi_peek_at_next_level (bidi_it);
2284       while (next_level != expected_next_level)
2285         {
2286           expected_next_level += incr;
2287           level_to_search += incr;
2288           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2289           bidi_it->scan_dir = -bidi_it->scan_dir;
2290           next_level = bidi_peek_at_next_level (bidi_it);
2291         }
2292
2293       /* Finally, deliver the next character in the new direction.  */
2294       next_level = bidi_level_of_next_char (bidi_it);
2295     }
2296
2297   /* Take note when we have just processed the newline that precedes
2298      the end of the paragraph.  The next time we are about to be
2299      called, set_iterator_to_next will automatically reinit the
2300      paragraph direction, if needed.  We do this at the newline before
2301      the paragraph separator, because the next character might not be
2302      the first character of the next paragraph, due to the bidi
2303      reordering, whereas we _must_ know the paragraph base direction
2304      _before_ we process the paragraph's text, since the base
2305      direction affects the reordering.  */
2306   if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
2307     {
2308       /* The paragraph direction of the entire string, once
2309          determined, is in effect for the entire string.  Setting the
2310          separator limit to the end of the string prevents
2311          bidi_paragraph_init from being called automatically on this
2312          string.  */
2313       if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
2314         bidi_it->separator_limit = bidi_it->string.schars;
2315       else if (bidi_it->bytepos < ZV_BYTE)
2316         {
2317           EMACS_INT sep_len =
2318             bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2319                                    bidi_it->bytepos + bidi_it->ch_len);
2320           if (bidi_it->nchars <= 0)
2321             abort ();
2322           if (sep_len >= 0)
2323             {
2324               bidi_it->new_paragraph = 1;
2325               /* Record the buffer position of the last character of the
2326                  paragraph separator.  */
2327               bidi_it->separator_limit =
2328                 bidi_it->charpos + bidi_it->nchars + sep_len;
2329             }
2330         }
2331     }
2332
2333   if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
2334     {
2335       /* If we are at paragraph's base embedding level and beyond the
2336          last cached position, the cache's job is done and we can
2337          discard it.  */
2338       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
2339           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2340                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
2341         bidi_cache_reset ();
2342         /* But as long as we are caching during forward scan, we must
2343            cache each state, or else the cache integrity will be
2344            compromised: it assumes cached states correspond to buffer
2345            positions 1:1.  */
2346       else
2347         bidi_cache_iterator_state (bidi_it, 1);
2348     }
2349
2350   if (STRINGP (bidi_it->string.lstring))
2351     UNGCPRO;
2352 }
2353
2354 /* This is meant to be called from within the debugger, whenever you
2355    wish to examine the cache contents.  */
2356 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
2357 void
2358 bidi_dump_cached_states (void)
2359 {
2360   ptrdiff_t i;
2361   int ndigits = 1;
2362
2363   if (bidi_cache_idx == 0)
2364     {
2365       fprintf (stderr, "The cache is empty.\n");
2366       return;
2367     }
2368   fprintf (stderr, "Total of  %"pD"d state%s in cache:\n",
2369            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2370
2371   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2372     ndigits++;
2373   fputs ("ch  ", stderr);
2374   for (i = 0; i < bidi_cache_idx; i++)
2375     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2376   fputs ("\n", stderr);
2377   fputs ("lvl ", stderr);
2378   for (i = 0; i < bidi_cache_idx; i++)
2379     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2380   fputs ("\n", stderr);
2381   fputs ("pos ", stderr);
2382   for (i = 0; i < bidi_cache_idx; i++)
2383     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
2384   fputs ("\n", stderr);
2385 }