src/bidi.c

   1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
   2    Copyright (C) 2000-2001, 2004-2005, 2009-2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software: you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation, either version 3 of the License, or
  10 (at your option) any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 /* Written by Eli Zaretskii <eliz@gnu.org>.
  21
  22    A sequential implementation of the Unicode Bidirectional algorithm,
  23    as per UAX#9, a part of the Unicode Standard.
  24
  25    Unlike the reference and most other implementations, this one is
  26    designed to be called once for every character in the buffer or
  27    string.
  28
  29    The main entry point is bidi_move_to_visually_next.  Each time it
  30    is called, it finds the next character in the visual order, and
  31    returns its information in a special structure.  The caller is then
  32    expected to process this character for display or any other
  33    purposes, and call bidi_move_to_visually_next for the next
  34    character.  See the comments in bidi_move_to_visually_next for more
  35    details about its algorithm that finds the next visual-order
  36    character by resolving their levels on the fly.
  37
  38    The two other entry points are bidi_paragraph_init and
  39    bidi_mirror_char.  The first determines the base direction of a
  40    paragraph, while the second returns the mirrored version of its
  41    argument character.
  42
  43    If you want to understand the code, you will have to read it
  44    together with the relevant portions of UAX#9.  The comments include
  45    references to UAX#9 rules, for that very reason.
  46
  47    A note about references to UAX#9 rules: if the reference says
  48    something like "X9/Retaining", it means that you need to refer to
  49    rule X9 and to its modifications decribed in the "Implementation
  50    Notes" section of UAX#9, under "Retaining Format Codes".  */
  51
  52 #include <config.h>
  53 #include <stdio.h>
  54 #include <setjmp.h>
  55
  56 #include "lisp.h"
  57 #include "buffer.h"
  58 #include "character.h"
  59 #include "dispextern.h"
  60
  61 static int bidi_initialized = 0;
  62
  63 static Lisp_Object bidi_type_table, bidi_mirror_table;
  64
  65 #define LRM_CHAR   0x200E
  66 #define RLM_CHAR   0x200F
  67 #define BIDI_EOB   -1
  68
  69 /* Local data structures.  (Look in dispextern.h for the rest.)  */
  70
  71 /* What we need to know about the current paragraph.  */
  72 struct bidi_paragraph_info {
  73   EMACS_INT start_bytepos;      /* byte position where it begins */
  74   EMACS_INT end_bytepos;        /* byte position where it ends */
  75   int       embedding_level;    /* its basic embedding level */
  76   bidi_dir_t base_dir;          /* its base direction */
  77 };
  78
  79 /* Data type for describing the bidirectional character categories.  */
  80 typedef enum {
  81   UNKNOWN_BC,
  82   NEUTRAL,
  83   WEAK,
  84   STRONG
  85 } bidi_category_t;
  86
  87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
  88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
  89
  90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
  91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
  92
  93 static void
  94 bidi_initialize (void)
  95 {
  96
  97 #include "biditype.h"
  98 #include "bidimirror.h"
  99
 100   int i;
 101
 102   bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
 103   staticpro (&bidi_type_table);
 104
 105   for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
 106     char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
 107                           make_number (bidi_type[i].type));
 108
 109   bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
 110   staticpro (&bidi_mirror_table);
 111
 112   for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
 113     char_table_set (bidi_mirror_table, bidi_mirror[i].from,
 114                     make_number (bidi_mirror[i].to));
 115
 116   Qparagraph_start = intern ("paragraph-start");
 117   staticpro (&Qparagraph_start);
 118   paragraph_start_re = Fsymbol_value (Qparagraph_start);
 119   if (!STRINGP (paragraph_start_re))
 120     paragraph_start_re = build_string ("\f\\|[ \t]*$");
 121   staticpro (&paragraph_start_re);
 122   Qparagraph_separate = intern ("paragraph-separate");
 123   staticpro (&Qparagraph_separate);
 124   paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
 125   if (!STRINGP (paragraph_separate_re))
 126     paragraph_separate_re = build_string ("[ \t\f]*$");
 127   staticpro (&paragraph_separate_re);
 128   bidi_initialized = 1;
 129 }
 130
 131 /* Return the bidi type of a character CH, subject to the current
 132    directional OVERRIDE.  */
 133 static inline bidi_type_t
 134 bidi_get_type (int ch, bidi_dir_t override)
 135 {
 136   bidi_type_t default_type;
 137
 138   if (ch == BIDI_EOB)
 139     return NEUTRAL_B;
 140   if (ch < 0 || ch > MAX_CHAR)
 141     abort ();
 142
 143   default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 144
 145   if (override == NEUTRAL_DIR)
 146     return default_type;
 147
 148   switch (default_type)
 149     {
 150       /* Although UAX#9 does not tell, it doesn't make sense to
 151          override NEUTRAL_B and LRM/RLM characters.  */
 152       case NEUTRAL_B:
 153       case LRE:
 154       case LRO:
 155       case RLE:
 156       case RLO:
 157       case PDF:
 158         return default_type;
 159       default:
 160         switch (ch)
 161           {
 162             case LRM_CHAR:
 163             case RLM_CHAR:
 164               return default_type;
 165             default:
 166               if (override == L2R) /* X6 */
 167                 return STRONG_L;
 168               else if (override == R2L)
 169                 return STRONG_R;
 170               else
 171                 abort ();       /* can't happen: handled above */
 172           }
 173     }
 174 }
 175
 176 static void
 177 bidi_check_type (bidi_type_t type)
 178 {
 179   if (type < UNKNOWN_BT || type > NEUTRAL_ON)
 180     abort ();
 181 }
 182
 183 /* Given a bidi TYPE of a character, return its category.  */
 184 static inline bidi_category_t
 185 bidi_get_category (bidi_type_t type)
 186 {
 187   switch (type)
 188     {
 189       case UNKNOWN_BT:
 190         return UNKNOWN_BC;
 191       case STRONG_L:
 192       case STRONG_R:
 193       case STRONG_AL:
 194       case LRE:
 195       case LRO:
 196       case RLE:
 197       case RLO:
 198         return STRONG;
 199       case PDF:         /* ??? really?? */
 200       case WEAK_EN:
 201       case WEAK_ES:
 202       case WEAK_ET:
 203       case WEAK_AN:
 204       case WEAK_CS:
 205       case WEAK_NSM:
 206       case WEAK_BN:
 207         return WEAK;
 208       case NEUTRAL_B:
 209       case NEUTRAL_S:
 210       case NEUTRAL_WS:
 211       case NEUTRAL_ON:
 212         return NEUTRAL;
 213       default:
 214         abort ();
 215     }
 216 }
 217
 218 /* Return the mirrored character of C, if it has one.  If C has no
 219    mirrored counterpart, return C.
 220    Note: The conditions in UAX#9 clause L4 regarding the surrounding
 221    context must be tested by the caller.  */
 222 int
 223 bidi_mirror_char (int c)
 224 {
 225   Lisp_Object val;
 226
 227   if (c == BIDI_EOB)
 228     return c;
 229   if (c < 0 || c > MAX_CHAR)
 230     abort ();
 231
 232   val = CHAR_TABLE_REF (bidi_mirror_table, c);
 233   if (INTEGERP (val))
 234     {
 235       int v = XINT (val);
 236
 237       if (v < 0 || v > MAX_CHAR)
 238         abort ();
 239
 240       return v;
 241     }
 242
 243   return c;
 244 }
 245
 246 /* Copy the bidi iterator from FROM to TO.  To save cycles, this only
 247    copies the part of the level stack that is actually in use.  */
 248 static inline void
 249 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
 250 {
 251   int i;
 252
 253   /* Copy everything except the level stack and beyond.  */
 254   memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
 255
 256   /* Copy the active part of the level stack.  */
 257   to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
 258   for (i = 1; i <= from->stack_idx; i++)
 259     to->level_stack[i] = from->level_stack[i];
 260 }
 261
 262 /* Caching the bidi iterator states.  */
 263
 264 #define BIDI_CACHE_CHUNK 200
 265 static struct bidi_it *bidi_cache;
 266 static ptrdiff_t bidi_cache_size = 0;
 267 enum { elsz = sizeof (struct bidi_it) };
 268 static ptrdiff_t bidi_cache_idx;        /* next unused cache slot */
 269 static ptrdiff_t bidi_cache_last_idx;   /* slot of last cache hit */
 270
 271 static inline void
 272 bidi_cache_reset (void)
 273 {
 274   bidi_cache_idx = 0;
 275   bidi_cache_last_idx = -1;
 276 }
 277
 278 static inline void
 279 bidi_cache_shrink (void)
 280 {
 281   if (bidi_cache_size > BIDI_CACHE_CHUNK)
 282     {
 283       bidi_cache_size = BIDI_CACHE_CHUNK;
 284       bidi_cache =
 285         (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 286     }
 287   bidi_cache_reset ();
 288 }
 289
 290 static inline void
 291 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
 292 {
 293   int current_scan_dir = bidi_it->scan_dir;
 294
 295   if (idx < 0 || idx >= bidi_cache_idx)
 296     abort ();
 297
 298   bidi_copy_it (bidi_it, &bidi_cache[idx]);
 299   bidi_it->scan_dir = current_scan_dir;
 300   bidi_cache_last_idx = idx;
 301 }
 302
 303 /* Find a cached state with a given CHARPOS and resolved embedding
 304    level less or equal to LEVEL.  if LEVEL is -1, disregard the
 305    resolved levels in cached states.  DIR, if non-zero, means search
 306    in that direction from the last cache hit.  */
 307 static inline ptrdiff_t
 308 bidi_cache_search (EMACS_INT charpos, int level, int dir)
 309 {
 310   ptrdiff_t i, i_start;
 311
 312   if (bidi_cache_idx)
 313     {
 314       if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
 315         {
 316           dir = -1;
 317           i_start = bidi_cache_last_idx - 1;
 318         }
 319       else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
 320                           + bidi_cache[bidi_cache_last_idx].nchars - 1))
 321         {
 322           dir = 1;
 323           i_start = bidi_cache_last_idx + 1;
 324         }
 325       else if (dir)
 326         i_start = bidi_cache_last_idx;
 327       else
 328         {
 329           dir = -1;
 330           i_start = bidi_cache_idx - 1;
 331         }
 332
 333       if (dir < 0)
 334         {
 335           /* Linear search for now; FIXME!  */
 336           for (i = i_start; i >= 0; i--)
 337             if (bidi_cache[i].charpos <= charpos
 338                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 339                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 340               return i;
 341         }
 342       else
 343         {
 344           for (i = i_start; i < bidi_cache_idx; i++)
 345             if (bidi_cache[i].charpos <= charpos
 346                 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
 347                 && (level == -1 || bidi_cache[i].resolved_level <= level))
 348               return i;
 349         }
 350     }
 351
 352   return -1;
 353 }
 354
 355 /* Find a cached state where the resolved level changes to a value
 356    that is lower than LEVEL, and return its cache slot index.  DIR is
 357    the direction to search, starting with the last used cache slot.
 358    BEFORE, if non-zero, means return the index of the slot that is
 359    ``before'' the level change in the search direction.  That is,
 360    given the cached levels like this:
 361
 362          1122333442211
 363           AB        C
 364
 365    and assuming we are at the position cached at the slot marked with
 366    C, searching backwards (DIR = -1) for LEVEL = 2 will return the
 367    index of slot B or A, depending whether BEFORE is, respectively,
 368    non-zero or zero.  */
 369 static ptrdiff_t
 370 bidi_cache_find_level_change (int level, int dir, int before)
 371 {
 372   if (bidi_cache_idx)
 373     {
 374       ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
 375       int incr = before ? 1 : 0;
 376
 377       if (!dir)
 378         dir = -1;
 379       else if (!incr)
 380         i += dir;
 381
 382       if (dir < 0)
 383         {
 384           while (i >= incr)
 385             {
 386               if (bidi_cache[i - incr].resolved_level >= 0
 387                   && bidi_cache[i - incr].resolved_level < level)
 388                 return i;
 389               i--;
 390             }
 391         }
 392       else
 393         {
 394           while (i < bidi_cache_idx - incr)
 395             {
 396               if (bidi_cache[i + incr].resolved_level >= 0
 397                   && bidi_cache[i + incr].resolved_level < level)
 398                 return i;
 399               i++;
 400             }
 401         }
 402     }
 403
 404   return -1;
 405 }
 406
 407 static inline void
 408 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
 409 {
 410   ptrdiff_t idx;
 411
 412   /* We should never cache on backward scans.  */
 413   if (bidi_it->scan_dir == -1)
 414     abort ();
 415   idx = bidi_cache_search (bidi_it->charpos, -1, 1);
 416
 417   if (idx < 0)
 418     {
 419       idx = bidi_cache_idx;
 420       /* Enlarge the cache as needed.  */
 421       if (idx >= bidi_cache_size)
 422         {
 423           if (min (PTRDIFF_MAX, SIZE_MAX) / elsz - BIDI_CACHE_CHUNK
 424               < bidi_cache_size)
 425             memory_full (SIZE_MAX);
 426           bidi_cache_size += BIDI_CACHE_CHUNK;
 427           bidi_cache =
 428             (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
 429         }
 430       /* Character positions should correspond to cache positions 1:1.
 431          If we are outside the range of cached positions, the cache is
 432          useless and must be reset.  */
 433       if (idx > 0 &&
 434           (bidi_it->charpos > (bidi_cache[idx - 1].charpos
 435                                + bidi_cache[idx - 1].nchars)
 436            || bidi_it->charpos < bidi_cache[0].charpos))
 437         {
 438           bidi_cache_reset ();
 439           idx = 0;
 440         }
 441       if (bidi_it->nchars <= 0)
 442         abort ();
 443       bidi_copy_it (&bidi_cache[idx], bidi_it);
 444       if (!resolved)
 445         bidi_cache[idx].resolved_level = -1;
 446     }
 447   else
 448     {
 449       /* Copy only the members which could have changed, to avoid
 450          costly copying of the entire struct.  */
 451       bidi_cache[idx].type = bidi_it->type;
 452       bidi_check_type (bidi_it->type);
 453       bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
 454       bidi_check_type (bidi_it->type_after_w1);
 455       if (resolved)
 456         bidi_cache[idx].resolved_level = bidi_it->resolved_level;
 457       else
 458         bidi_cache[idx].resolved_level = -1;
 459       bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
 460       bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
 461       bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
 462       bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
 463       bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
 464     }
 465
 466   bidi_cache_last_idx = idx;
 467   if (idx >= bidi_cache_idx)
 468     bidi_cache_idx = idx + 1;
 469 }
 470
 471 static inline bidi_type_t
 472 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
 473 {
 474   ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
 475
 476   if (i >= 0)
 477     {
 478       bidi_dir_t current_scan_dir = bidi_it->scan_dir;
 479
 480       bidi_copy_it (bidi_it, &bidi_cache[i]);
 481       bidi_cache_last_idx = i;
 482       /* Don't let scan direction from from the cached state override
 483          the current scan direction.  */
 484       bidi_it->scan_dir = current_scan_dir;
 485       return bidi_it->type;
 486     }
 487
 488   return UNKNOWN_BT;
 489 }
 490
 491 static inline int
 492 bidi_peek_at_next_level (struct bidi_it *bidi_it)
 493 {
 494   if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
 495     abort ();
 496   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 497 }
 498
 499 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
 500    Value is the non-negative length of the paragraph separator
 501    following the buffer position, -1 if position is at the beginning
 502    of a new paragraph, or -2 if position is neither at beginning nor
 503    at end of a paragraph.  */
 504 static EMACS_INT
 505 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
 506 {
 507   Lisp_Object sep_re;
 508   Lisp_Object start_re;
 509   EMACS_INT val;
 510
 511   sep_re = paragraph_separate_re;
 512   start_re = paragraph_start_re;
 513
 514   val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
 515   if (val < 0)
 516     {
 517       if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
 518         val = -1;
 519       else
 520         val = -2;
 521     }
 522
 523   return val;
 524 }
 525
 526 /* Determine the start-of-run (sor) directional type given the two
 527    embedding levels on either side of the run boundary.  Also, update
 528    the saved info about previously seen characters, since that info is
 529    generally valid for a single level run.  */
 530 static inline void
 531 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
 532 {
 533   int higher_level = level_before > level_after ? level_before : level_after;
 534
 535   /* The prev_was_pdf gork is required for when we have several PDFs
 536      in a row.  In that case, we want to compute the sor type for the
 537      next level run only once: when we see the first PDF.  That's
 538      because the sor type depends only on the higher of the two levels
 539      that we find on the two sides of the level boundary (see UAX#9,
 540      clause X10), and so we don't need to know the final embedding
 541      level to which we descend after processing all the PDFs.  */
 542   if (!bidi_it->prev_was_pdf || level_before < level_after)
 543     /* FIXME: should the default sor direction be user selectable?  */
 544     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
 545   if (level_before > level_after)
 546     bidi_it->prev_was_pdf = 1;
 547
 548   bidi_it->prev.type = UNKNOWN_BT;
 549   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 550     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 551   bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
 552   bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
 553   bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
 554   bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
 555     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 556   bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
 557 }
 558
 559 /* Perform initializations for reordering a new line of bidi text.  */
 560 static void
 561 bidi_line_init (struct bidi_it *bidi_it)
 562 {
 563   bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
 564   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 565   bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
 566   bidi_it->invalid_levels = 0;
 567   bidi_it->invalid_rl_levels = -1;
 568   bidi_it->next_en_pos = -1;
 569   bidi_it->next_for_ws.type = UNKNOWN_BT;
 570   bidi_set_sor_type (bidi_it,
 571                      bidi_it->paragraph_dir == R2L ? 1 : 0,
 572                      bidi_it->level_stack[0].level); /* X10 */
 573
 574   bidi_cache_reset ();
 575 }
 576
 577 /* Fetch and return the character at BYTEPOS/CHARPOS.  If that
 578    character is covered by a display string, treat the entire run of
 579    covered characters as a single character u+FFFC, and return their
 580    combined length in CH_LEN and NCHARS.  DISP_POS specifies the
 581    character position of the next display string, or -1 if not yet
 582    computed.  When the next character is at or beyond that position,
 583    the function updates DISP_POS with the position of the next display
 584    string.  */
 585 static inline int
 586 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
 587                  int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
 588 {
 589   int ch;
 590
 591   /* FIXME: Support strings in addition to buffers.  */
 592   /* If we got past the last known position of display string, compute
 593      the position of the next one.  That position could be at BYTEPOS.  */
 594   if (charpos < ZV && charpos > *disp_pos)
 595     *disp_pos = compute_display_string_pos (charpos, frame_window_p);
 596
 597   /* Fetch the character at BYTEPOS.  */
 598   if (bytepos >= ZV_BYTE)
 599     {
 600       ch = BIDI_EOB;
 601       *ch_len = 1;
 602       *nchars = 1;
 603       *disp_pos = ZV;
 604     }
 605   else if (charpos >= *disp_pos)
 606     {
 607       EMACS_INT disp_end_pos;
 608
 609       /* We don't expect to find ourselves in the middle of a display
 610          property.  Hopefully, it will never be needed.  */
 611       if (charpos > *disp_pos)
 612         abort ();
 613       /* Return the Unicode Object Replacement Character to represent
 614          the entire run of characters covered by the display
 615          string.  */
 616       ch = 0xFFFC;
 617       disp_end_pos = compute_display_string_end (*disp_pos);
 618       *nchars = disp_end_pos - *disp_pos;
 619       *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
 620     }
 621   else
 622     {
 623       ch = FETCH_MULTIBYTE_CHAR (bytepos);
 624       *nchars = 1;
 625       *ch_len = CHAR_BYTES (ch);
 626     }
 627
 628   /* If we just entered a run of characters covered by a display
 629      string, compute the position of the next display string.  */
 630   if (charpos + *nchars <= ZV && charpos + *nchars > *disp_pos)
 631     *disp_pos = compute_display_string_pos (charpos + *nchars, frame_window_p);
 632
 633   return ch;
 634 }
 635
 636 /* Find the beginning of this paragraph by looking back in the buffer.
 637    Value is the byte position of the paragraph's beginning.  */
 638 static EMACS_INT
 639 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
 640 {
 641   Lisp_Object re = paragraph_start_re;
 642   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
 643
 644   while (pos_byte > BEGV_BYTE
 645          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
 646     {
 647       /* FIXME: What if the paragraph beginning is covered by a
 648          display string?  And what if a display string covering some
 649          of the text over which we scan back includes
 650          paragraph_start_re?  */
 651       pos = find_next_newline_no_quit (pos - 1, -1);
 652       pos_byte = CHAR_TO_BYTE (pos);
 653     }
 654   return pos_byte;
 655 }
 656
 657 /* Determine the base direction, a.k.a. base embedding level, of the
 658    paragraph we are about to iterate through.  If DIR is either L2R or
 659    R2L, just use that.  Otherwise, determine the paragraph direction
 660    from the first strong directional character of the paragraph.
 661
 662    NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
 663    has no strong directional characters and both DIR and
 664    bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
 665    in the buffer until a paragraph is found with a strong character,
 666    or until hitting BEGV.  In the latter case, fall back to L2R.  This
 667    flag is used in current-bidi-paragraph-direction.
 668
 669    Note that this function gives the paragraph separator the same
 670    direction as the preceding paragraph, even though Emacs generally
 671    views the separartor as not belonging to any paragraph.  */
 672 void
 673 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
 674 {
 675   EMACS_INT bytepos = bidi_it->bytepos;
 676   EMACS_INT pstartbyte;
 677
 678   /* Special case for an empty buffer. */
 679   if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
 680     dir = L2R;
 681   /* We should never be called at EOB or before BEGV.  */
 682   else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
 683     abort ();
 684
 685   if (dir == L2R)
 686     {
 687       bidi_it->paragraph_dir = L2R;
 688       bidi_it->new_paragraph = 0;
 689     }
 690   else if (dir == R2L)
 691     {
 692       bidi_it->paragraph_dir = R2L;
 693       bidi_it->new_paragraph = 0;
 694     }
 695   else if (dir == NEUTRAL_DIR)  /* P2 */
 696     {
 697       int ch;
 698       EMACS_INT ch_len, nchars;
 699       EMACS_INT pos, disp_pos = -1;
 700       bidi_type_t type;
 701
 702       if (!bidi_initialized)
 703         bidi_initialize ();
 704
 705       /* If we are inside a paragraph separator, we are just waiting
 706          for the separator to be exhausted; use the previous paragraph
 707          direction.  But don't do that if we have been just reseated,
 708          because we need to reinitialize below in that case.  */
 709       if (!bidi_it->first_elt
 710           && bidi_it->charpos < bidi_it->separator_limit)
 711         return;
 712
 713       /* If we are on a newline, get past it to where the next
 714          paragraph might start.  But don't do that at BEGV since then
 715          we are potentially in a new paragraph that doesn't yet
 716          exist.  */
 717       pos = bidi_it->charpos;
 718       if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
 719         {
 720           bytepos++;
 721           pos++;
 722         }
 723
 724       /* We are either at the beginning of a paragraph or in the
 725          middle of it.  Find where this paragraph starts.  */
 726       pstartbyte = bidi_find_paragraph_start (pos, bytepos);
 727       bidi_it->separator_limit = -1;
 728       bidi_it->new_paragraph = 0;
 729
 730       /* The following loop is run more than once only if NO_DEFAULT_P
 731          is non-zero.  */
 732       do {
 733         bytepos = pstartbyte;
 734         pos = BYTE_TO_CHAR (bytepos);
 735         ch = bidi_fetch_char (bytepos, pos, &disp_pos, bidi_it->frame_window_p,
 736                               &ch_len, &nchars);
 737         type = bidi_get_type (ch, NEUTRAL_DIR);
 738
 739         for (pos += nchars, bytepos += ch_len;
 740              /* NOTE: UAX#9 says to search only for L, AL, or R types
 741                 of characters, and ignore RLE, RLO, LRE, and LRO.
 742                 However, I'm not sure it makes sense to omit those 4;
 743                 should try with and without that to see the effect.  */
 744              (bidi_get_category (type) != STRONG)
 745                || (bidi_ignore_explicit_marks_for_paragraph_level
 746                    && (type == RLE || type == RLO
 747                        || type == LRE || type == LRO));
 748              type = bidi_get_type (ch, NEUTRAL_DIR))
 749           {
 750             if (bytepos >= ZV_BYTE)
 751               {
 752                 /* Pretend there's a paragraph separator at end of
 753                    buffer.  */
 754                 type = NEUTRAL_B;
 755                 break;
 756               }
 757             if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
 758               break;
 759             /* Fetch next character and advance to get past it.  */
 760             ch = bidi_fetch_char (bytepos, pos, &disp_pos,
 761                                   bidi_it->frame_window_p, &ch_len, &nchars);
 762             pos += nchars;
 763             bytepos += ch_len;
 764           }
 765         if (type == STRONG_R || type == STRONG_AL) /* P3 */
 766           bidi_it->paragraph_dir = R2L;
 767         else if (type == STRONG_L)
 768           bidi_it->paragraph_dir = L2R;
 769         if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
 770           {
 771             /* If this paragraph is at BEGV, default to L2R.  */
 772             if (pstartbyte == BEGV_BYTE)
 773               bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
 774             else
 775               {
 776                 EMACS_INT prevpbyte = pstartbyte;
 777                 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
 778
 779                 /* Find the beginning of the previous paragraph, if any.  */
 780                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
 781                   {
 782                     /* FXIME: What if p is covered by a display
 783                        string?  See also a FIXME inside
 784                        bidi_find_paragraph_start.  */
 785                     p--;
 786                     pbyte = CHAR_TO_BYTE (p);
 787                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
 788                   }
 789                 pstartbyte = prevpbyte;
 790               }
 791           }
 792       } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
 793     }
 794   else
 795     abort ();
 796
 797   /* Contrary to UAX#9 clause P3, we only default the paragraph
 798      direction to L2R if we have no previous usable paragraph
 799      direction.  This is allowed by the HL1 clause.  */
 800   if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
 801     bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
 802   if (bidi_it->paragraph_dir == R2L)
 803     bidi_it->level_stack[0].level = 1;
 804   else
 805     bidi_it->level_stack[0].level = 0;
 806
 807   bidi_line_init (bidi_it);
 808 }
 809
 810 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
 811    end.  */
 812 static inline void
 813 bidi_set_paragraph_end (struct bidi_it *bidi_it)
 814 {
 815   bidi_it->invalid_levels = 0;
 816   bidi_it->invalid_rl_levels = -1;
 817   bidi_it->stack_idx = 0;
 818   bidi_it->resolved_level = bidi_it->level_stack[0].level;
 819 }
 820
 821 /* Initialize the bidi iterator from buffer/string position CHARPOS.  */
 822 void
 823 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
 824               struct bidi_it *bidi_it)
 825 {
 826   if (! bidi_initialized)
 827     bidi_initialize ();
 828   bidi_it->charpos = charpos;
 829   bidi_it->bytepos = bytepos;
 830   bidi_it->frame_window_p = frame_window_p;
 831   bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
 832   bidi_it->first_elt = 1;
 833   bidi_set_paragraph_end (bidi_it);
 834   bidi_it->new_paragraph = 1;
 835   bidi_it->separator_limit = -1;
 836   bidi_it->type = NEUTRAL_B;
 837   bidi_it->type_after_w1 = NEUTRAL_B;
 838   bidi_it->orig_type = NEUTRAL_B;
 839   bidi_it->prev_was_pdf = 0;
 840   bidi_it->prev.type = bidi_it->prev.type_after_w1 =
 841     bidi_it->prev.orig_type = UNKNOWN_BT;
 842   bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
 843     bidi_it->last_strong.orig_type = UNKNOWN_BT;
 844   bidi_it->next_for_neutral.charpos = -1;
 845   bidi_it->next_for_neutral.type =
 846     bidi_it->next_for_neutral.type_after_w1 =
 847     bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
 848   bidi_it->prev_for_neutral.charpos = -1;
 849   bidi_it->prev_for_neutral.type =
 850     bidi_it->prev_for_neutral.type_after_w1 =
 851     bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
 852   bidi_it->sor = L2R;    /* FIXME: should it be user-selectable? */
 853   bidi_it->disp_pos = -1;       /* invalid/unknown */
 854   bidi_cache_shrink ();
 855 }
 856
 857 /* Push the current embedding level and override status; reset the
 858    current level to LEVEL and the current override status to OVERRIDE.  */
 859 static inline void
 860 bidi_push_embedding_level (struct bidi_it *bidi_it,
 861                            int level, bidi_dir_t override)
 862 {
 863   bidi_it->stack_idx++;
 864   if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
 865     abort ();
 866   bidi_it->level_stack[bidi_it->stack_idx].level = level;
 867   bidi_it->level_stack[bidi_it->stack_idx].override = override;
 868 }
 869
 870 /* Pop the embedding level and directional override status from the
 871    stack, and return the new level.  */
 872 static inline int
 873 bidi_pop_embedding_level (struct bidi_it *bidi_it)
 874 {
 875   /* UAX#9 says to ignore invalid PDFs.  */
 876   if (bidi_it->stack_idx > 0)
 877     bidi_it->stack_idx--;
 878   return bidi_it->level_stack[bidi_it->stack_idx].level;
 879 }
 880
 881 /* Record in SAVED_INFO the information about the current character.  */
 882 static inline void
 883 bidi_remember_char (struct bidi_saved_info *saved_info,
 884                     struct bidi_it *bidi_it)
 885 {
 886   saved_info->charpos = bidi_it->charpos;
 887   saved_info->bytepos = bidi_it->bytepos;
 888   saved_info->type = bidi_it->type;
 889   bidi_check_type (bidi_it->type);
 890   saved_info->type_after_w1 = bidi_it->type_after_w1;
 891   bidi_check_type (bidi_it->type_after_w1);
 892   saved_info->orig_type = bidi_it->orig_type;
 893   bidi_check_type (bidi_it->orig_type);
 894 }
 895
 896 /* Resolve the type of a neutral character according to the type of
 897    surrounding strong text and the current embedding level.  */
 898 static inline bidi_type_t
 899 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
 900 {
 901   /* N1: European and Arabic numbers are treated as though they were R.  */
 902   if (next_type == WEAK_EN || next_type == WEAK_AN)
 903     next_type = STRONG_R;
 904   if (prev_type == WEAK_EN || prev_type == WEAK_AN)
 905     prev_type = STRONG_R;
 906
 907   if (next_type == prev_type)   /* N1 */
 908     return next_type;
 909   else if ((lev & 1) == 0)      /* N2 */
 910     return STRONG_L;
 911   else
 912     return STRONG_R;
 913 }
 914
 915 static inline int
 916 bidi_explicit_dir_char (int ch)
 917 {
 918   bidi_type_t ch_type;
 919
 920   if (!bidi_initialized)
 921     abort ();
 922   ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 923   return (ch_type == LRE || ch_type == LRO
 924           || ch_type == RLE || ch_type == RLO
 925           || ch_type == PDF);
 926 }
 927
 928 /* A helper function for bidi_resolve_explicit.  It advances to the
 929    next character in logical order and determines the new embedding
 930    level and directional override, but does not take into account
 931    empty embeddings.  */
 932 static int
 933 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
 934 {
 935   int curchar;
 936   bidi_type_t type;
 937   int current_level;
 938   int new_level;
 939   bidi_dir_t override;
 940
 941   /* If reseat()'ed, don't advance, so as to start iteration from the
 942      position where we were reseated.  bidi_it->bytepos can be less
 943      than BEGV_BYTE after reseat to BEGV.  */
 944   if (bidi_it->bytepos < BEGV_BYTE
 945       || bidi_it->first_elt)
 946     {
 947       bidi_it->first_elt = 0;
 948       if (bidi_it->charpos < BEGV)
 949         bidi_it->charpos = BEGV;
 950       bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
 951     }
 952   else if (bidi_it->bytepos < ZV_BYTE)  /* don't move at ZV */
 953     {
 954       /* Advance to the next character, skipping characters covered by
 955          display strings (nchars > 1).  */
 956       if (bidi_it->nchars <= 0)
 957         abort ();
 958       bidi_it->charpos += bidi_it->nchars;
 959       if (bidi_it->ch_len == 0)
 960         abort ();
 961       bidi_it->bytepos += bidi_it->ch_len;
 962     }
 963
 964   current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
 965   override = bidi_it->level_stack[bidi_it->stack_idx].override;
 966   new_level = current_level;
 967
 968   if (bidi_it->bytepos >= ZV_BYTE)
 969     {
 970       curchar = BIDI_EOB;
 971       bidi_it->ch_len = 1;
 972       bidi_it->nchars = 1;
 973       bidi_it->disp_pos = ZV;
 974     }
 975   else
 976     {
 977       /* Fetch the character at BYTEPOS.  If it is covered by a
 978          display string, treat the entire run of covered characters as
 979          a single character u+FFFC.  */
 980       curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
 981                                  &bidi_it->disp_pos, bidi_it->frame_window_p,
 982                                  &bidi_it->ch_len, &bidi_it->nchars);
 983     }
 984   bidi_it->ch = curchar;
 985
 986   /* Don't apply directional override here, as all the types we handle
 987      below will not be affected by the override anyway, and we need
 988      the original type unaltered.  The override will be applied in
 989      bidi_resolve_weak.  */
 990   type = bidi_get_type (curchar, NEUTRAL_DIR);
 991   bidi_it->orig_type = type;
 992   bidi_check_type (bidi_it->orig_type);
 993
 994   if (type != PDF)
 995     bidi_it->prev_was_pdf = 0;
 996
 997   bidi_it->type_after_w1 = UNKNOWN_BT;
 998
 999   switch (type)
1000     {
1001       case RLE: /* X2 */
1002       case RLO: /* X4 */
1003         bidi_it->type_after_w1 = type;
1004         bidi_check_type (bidi_it->type_after_w1);
1005         type = WEAK_BN; /* X9/Retaining */
1006         if (bidi_it->ignore_bn_limit <= 0)
1007           {
1008             if (current_level <= BIDI_MAXLEVEL - 4)
1009               {
1010                 /* Compute the least odd embedding level greater than
1011                    the current level.  */
1012                 new_level = ((current_level + 1) & ~1) + 1;
1013                 if (bidi_it->type_after_w1 == RLE)
1014                   override = NEUTRAL_DIR;
1015                 else
1016                   override = R2L;
1017                 if (current_level == BIDI_MAXLEVEL - 4)
1018                   bidi_it->invalid_rl_levels = 0;
1019                 bidi_push_embedding_level (bidi_it, new_level, override);
1020               }
1021             else
1022               {
1023                 bidi_it->invalid_levels++;
1024                 /* See the commentary about invalid_rl_levels below.  */
1025                 if (bidi_it->invalid_rl_levels < 0)
1026                   bidi_it->invalid_rl_levels = 0;
1027                 bidi_it->invalid_rl_levels++;
1028               }
1029           }
1030         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1031                  || bidi_it->next_en_pos > bidi_it->charpos)
1032           type = WEAK_EN;
1033         break;
1034       case LRE: /* X3 */
1035       case LRO: /* X5 */
1036         bidi_it->type_after_w1 = type;
1037         bidi_check_type (bidi_it->type_after_w1);
1038         type = WEAK_BN; /* X9/Retaining */
1039         if (bidi_it->ignore_bn_limit <= 0)
1040           {
1041             if (current_level <= BIDI_MAXLEVEL - 5)
1042               {
1043                 /* Compute the least even embedding level greater than
1044                    the current level.  */
1045                 new_level = ((current_level + 2) & ~1);
1046                 if (bidi_it->type_after_w1 == LRE)
1047                   override = NEUTRAL_DIR;
1048                 else
1049                   override = L2R;
1050                 bidi_push_embedding_level (bidi_it, new_level, override);
1051               }
1052             else
1053               {
1054                 bidi_it->invalid_levels++;
1055                 /* invalid_rl_levels counts invalid levels encountered
1056                    while the embedding level was already too high for
1057                    LRE/LRO, but not for RLE/RLO.  That is because
1058                    there may be exactly one PDF which we should not
1059                    ignore even though invalid_levels is non-zero.
1060                    invalid_rl_levels helps to know what PDF is
1061                    that.  */
1062                 if (bidi_it->invalid_rl_levels >= 0)
1063                   bidi_it->invalid_rl_levels++;
1064               }
1065           }
1066         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1067                  || bidi_it->next_en_pos > bidi_it->charpos)
1068           type = WEAK_EN;
1069         break;
1070       case PDF: /* X7 */
1071         bidi_it->type_after_w1 = type;
1072         bidi_check_type (bidi_it->type_after_w1);
1073         type = WEAK_BN; /* X9/Retaining */
1074         if (bidi_it->ignore_bn_limit <= 0)
1075           {
1076             if (!bidi_it->invalid_rl_levels)
1077               {
1078                 new_level = bidi_pop_embedding_level (bidi_it);
1079                 bidi_it->invalid_rl_levels = -1;
1080                 if (bidi_it->invalid_levels)
1081                   bidi_it->invalid_levels--;
1082                 /* else nothing: UAX#9 says to ignore invalid PDFs */
1083               }
1084             if (!bidi_it->invalid_levels)
1085               new_level = bidi_pop_embedding_level (bidi_it);
1086             else
1087               {
1088                 bidi_it->invalid_levels--;
1089                 bidi_it->invalid_rl_levels--;
1090               }
1091           }
1092         else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1093                  || bidi_it->next_en_pos > bidi_it->charpos)
1094           type = WEAK_EN;
1095         break;
1096       default:
1097         /* Nothing.  */
1098         break;
1099     }
1100
1101   bidi_it->type = type;
1102   bidi_check_type (bidi_it->type);
1103
1104   return new_level;
1105 }
1106
1107 /* Given an iterator state in BIDI_IT, advance one character position
1108    in the buffer/string to the next character (in the logical order),
1109    resolve any explicit embeddings and directional overrides, and
1110    return the embedding level of the character after resolving
1111    explicit directives and ignoring empty embeddings.  */
1112 static int
1113 bidi_resolve_explicit (struct bidi_it *bidi_it)
1114 {
1115   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1116   int new_level  = bidi_resolve_explicit_1 (bidi_it);
1117
1118   if (prev_level < new_level
1119       && bidi_it->type == WEAK_BN
1120       && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1121       && bidi_it->bytepos < ZV_BYTE    /* not already at EOB */
1122       && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1123                                                        + bidi_it->ch_len)))
1124     {
1125       /* Avoid pushing and popping embedding levels if the level run
1126          is empty, as this breaks level runs where it shouldn't.
1127          UAX#9 removes all the explicit embedding and override codes,
1128          so empty embeddings disappear without a trace.  We need to
1129          behave as if we did the same.  */
1130       struct bidi_it saved_it;
1131       int level = prev_level;
1132
1133       bidi_copy_it (&saved_it, bidi_it);
1134
1135       while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1136                                                            + bidi_it->ch_len)))
1137         {
1138           /* This advances to the next character, skipping any
1139              characters covered by display strings.  */
1140           level = bidi_resolve_explicit_1 (bidi_it);
1141         }
1142
1143       if (bidi_it->nchars <= 0)
1144         abort ();
1145       if (level == prev_level)  /* empty embedding */
1146         saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1147       else                      /* this embedding is non-empty */
1148         saved_it.ignore_bn_limit = -1;
1149
1150       bidi_copy_it (bidi_it, &saved_it);
1151       if (bidi_it->ignore_bn_limit > 0)
1152         {
1153           /* We pushed a level, but we shouldn't have.  Undo that. */
1154           if (!bidi_it->invalid_rl_levels)
1155             {
1156               new_level = bidi_pop_embedding_level (bidi_it);
1157               bidi_it->invalid_rl_levels = -1;
1158               if (bidi_it->invalid_levels)
1159                 bidi_it->invalid_levels--;
1160             }
1161           if (!bidi_it->invalid_levels)
1162             new_level = bidi_pop_embedding_level (bidi_it);
1163           else
1164             {
1165               bidi_it->invalid_levels--;
1166               bidi_it->invalid_rl_levels--;
1167             }
1168         }
1169     }
1170
1171   if (bidi_it->type == NEUTRAL_B)       /* X8 */
1172     {
1173       bidi_set_paragraph_end (bidi_it);
1174       /* This is needed by bidi_resolve_weak below, and in L1.  */
1175       bidi_it->type_after_w1 = bidi_it->type;
1176       bidi_check_type (bidi_it->type_after_w1);
1177     }
1178
1179   return new_level;
1180 }
1181
1182 /* Advance in the buffer/string, resolve weak types and return the
1183    type of the next character after weak type resolution.  */
1184 static bidi_type_t
1185 bidi_resolve_weak (struct bidi_it *bidi_it)
1186 {
1187   bidi_type_t type;
1188   bidi_dir_t override;
1189   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1190   int new_level  = bidi_resolve_explicit (bidi_it);
1191   int next_char;
1192   bidi_type_t type_of_next;
1193   struct bidi_it saved_it;
1194
1195   type = bidi_it->type;
1196   override = bidi_it->level_stack[bidi_it->stack_idx].override;
1197
1198   if (type == UNKNOWN_BT
1199       || type == LRE
1200       || type == LRO
1201       || type == RLE
1202       || type == RLO
1203       || type == PDF)
1204     abort ();
1205
1206   if (new_level != prev_level
1207       || bidi_it->type == NEUTRAL_B)
1208     {
1209       /* We've got a new embedding level run, compute the directional
1210          type of sor and initialize per-run variables (UAX#9, clause
1211          X10).  */
1212       bidi_set_sor_type (bidi_it, prev_level, new_level);
1213     }
1214   else if (type == NEUTRAL_S || type == NEUTRAL_WS
1215            || type == WEAK_BN || type == STRONG_AL)
1216     bidi_it->type_after_w1 = type;      /* needed in L1 */
1217   bidi_check_type (bidi_it->type_after_w1);
1218
1219   /* Level and directional override status are already recorded in
1220      bidi_it, and do not need any change; see X6.  */
1221   if (override == R2L)          /* X6 */
1222     type = STRONG_R;
1223   else if (override == L2R)
1224     type = STRONG_L;
1225   else
1226     {
1227       if (type == WEAK_NSM)     /* W1 */
1228         {
1229           /* Note that we don't need to consider the case where the
1230              prev character has its type overridden by an RLO or LRO,
1231              because then either the type of this NSM would have been
1232              also overridden, or the previous character is outside the
1233              current level run, and thus not relevant to this NSM.
1234              This is why NSM gets the type_after_w1 of the previous
1235              character.  */
1236           if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1237               /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1238               && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1239             type = bidi_it->prev.type_after_w1;
1240           else if (bidi_it->sor == R2L)
1241             type = STRONG_R;
1242           else if (bidi_it->sor == L2R)
1243             type = STRONG_L;
1244           else /* shouldn't happen! */
1245             abort ();
1246         }
1247       if (type == WEAK_EN       /* W2 */
1248           && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1249         type = WEAK_AN;
1250       else if (type == STRONG_AL) /* W3 */
1251         type = STRONG_R;
1252       else if ((type == WEAK_ES /* W4 */
1253                 && bidi_it->prev.type_after_w1 == WEAK_EN
1254                 && bidi_it->prev.orig_type == WEAK_EN)
1255                || (type == WEAK_CS
1256                    && ((bidi_it->prev.type_after_w1 == WEAK_EN
1257                         && bidi_it->prev.orig_type == WEAK_EN)
1258                        || bidi_it->prev.type_after_w1 == WEAK_AN)))
1259         {
1260           next_char =
1261             bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1262             ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1263                                                + bidi_it->ch_len);
1264           type_of_next = bidi_get_type (next_char, override);
1265
1266           if (type_of_next == WEAK_BN
1267               || bidi_explicit_dir_char (next_char))
1268             {
1269               bidi_copy_it (&saved_it, bidi_it);
1270               while (bidi_resolve_explicit (bidi_it) == new_level
1271                      && bidi_it->type == WEAK_BN)
1272                 ;
1273               type_of_next = bidi_it->type;
1274               bidi_copy_it (bidi_it, &saved_it);
1275             }
1276
1277           /* If the next character is EN, but the last strong-type
1278              character is AL, that next EN will be changed to AN when
1279              we process it in W2 above.  So in that case, this ES
1280              should not be changed into EN.  */
1281           if (type == WEAK_ES
1282               && type_of_next == WEAK_EN
1283               && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1284             type = WEAK_EN;
1285           else if (type == WEAK_CS)
1286             {
1287               if (bidi_it->prev.type_after_w1 == WEAK_AN
1288                   && (type_of_next == WEAK_AN
1289                       /* If the next character is EN, but the last
1290                          strong-type character is AL, EN will be later
1291                          changed to AN when we process it in W2 above.
1292                          So in that case, this ES should not be
1293                          changed into EN.  */
1294                       || (type_of_next == WEAK_EN
1295                           && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1296                 type = WEAK_AN;
1297               else if (bidi_it->prev.type_after_w1 == WEAK_EN
1298                        && type_of_next == WEAK_EN
1299                        && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1300                 type = WEAK_EN;
1301             }
1302         }
1303       else if (type == WEAK_ET  /* W5: ET with EN before or after it */
1304                || type == WEAK_BN)      /* W5/Retaining */
1305         {
1306           if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1307               || bidi_it->next_en_pos > bidi_it->charpos)
1308             type = WEAK_EN;
1309           else                  /* W5: ET/BN with EN after it.  */
1310             {
1311               EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1312
1313               if (bidi_it->nchars <= 0)
1314                 abort ();
1315               next_char =
1316                 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1317                 ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1318                                                    + bidi_it->ch_len);
1319               type_of_next = bidi_get_type (next_char, override);
1320
1321               if (type_of_next == WEAK_ET
1322                   || type_of_next == WEAK_BN
1323                   || bidi_explicit_dir_char (next_char))
1324                 {
1325                   bidi_copy_it (&saved_it, bidi_it);
1326                   while (bidi_resolve_explicit (bidi_it) == new_level
1327                          && (bidi_it->type == WEAK_BN
1328                              || bidi_it->type == WEAK_ET))
1329                     ;
1330                   type_of_next = bidi_it->type;
1331                   en_pos = bidi_it->charpos;
1332                   bidi_copy_it (bidi_it, &saved_it);
1333                 }
1334               if (type_of_next == WEAK_EN)
1335                 {
1336                   /* If the last strong character is AL, the EN we've
1337                      found will become AN when we get to it (W2). */
1338                   if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1339                     {
1340                       type = WEAK_EN;
1341                       /* Remember this EN position, to speed up processing
1342                          of the next ETs.  */
1343                       bidi_it->next_en_pos = en_pos;
1344                     }
1345                   else if (type == WEAK_BN)
1346                     type = NEUTRAL_ON; /* W6/Retaining */
1347                 }
1348             }
1349         }
1350     }
1351
1352   if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1353       || (type == WEAK_BN
1354           && (bidi_it->prev.type_after_w1 == WEAK_CS        /* W6/Retaining */
1355               || bidi_it->prev.type_after_w1 == WEAK_ES
1356               || bidi_it->prev.type_after_w1 == WEAK_ET)))
1357     type = NEUTRAL_ON;
1358
1359   /* Store the type we've got so far, before we clobber it with strong
1360      types in W7 and while resolving neutral types.  But leave alone
1361      the original types that were recorded above, because we will need
1362      them for the L1 clause.  */
1363   if (bidi_it->type_after_w1 == UNKNOWN_BT)
1364     bidi_it->type_after_w1 = type;
1365   bidi_check_type (bidi_it->type_after_w1);
1366
1367   if (type == WEAK_EN)  /* W7 */
1368     {
1369       if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1370           || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1371         type = STRONG_L;
1372     }
1373
1374   bidi_it->type = type;
1375   bidi_check_type (bidi_it->type);
1376   return type;
1377 }
1378
1379 static bidi_type_t
1380 bidi_resolve_neutral (struct bidi_it *bidi_it)
1381 {
1382   int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1383   bidi_type_t type = bidi_resolve_weak (bidi_it);
1384   int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1385
1386   if (!(type == STRONG_R
1387         || type == STRONG_L
1388         || type == WEAK_BN
1389         || type == WEAK_EN
1390         || type == WEAK_AN
1391         || type == NEUTRAL_B
1392         || type == NEUTRAL_S
1393         || type == NEUTRAL_WS
1394         || type == NEUTRAL_ON))
1395     abort ();
1396
1397   if (bidi_get_category (type) == NEUTRAL
1398       || (type == WEAK_BN && prev_level == current_level))
1399     {
1400       if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1401         type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1402                                        bidi_it->next_for_neutral.type,
1403                                        current_level);
1404       else
1405         {
1406           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
1407              the assumption of batch-style processing; see clauses W4,
1408              W5, and especially N1, which require to look far forward
1409              (as well as back) in the buffer/string.  May the fleas of
1410              a thousand camels infest the armpits of those who design
1411              supposedly general-purpose algorithms by looking at their
1412              own implementations, and fail to consider other possible
1413              implementations!  */
1414           struct bidi_it saved_it;
1415           bidi_type_t next_type;
1416
1417           if (bidi_it->scan_dir == -1)
1418             abort ();
1419
1420           bidi_copy_it (&saved_it, bidi_it);
1421           /* Scan the text forward until we find the first non-neutral
1422              character, and then use that to resolve the neutral we
1423              are dealing with now.  We also cache the scanned iterator
1424              states, to salvage some of the effort later.  */
1425           bidi_cache_iterator_state (bidi_it, 0);
1426           do {
1427             /* Record the info about the previous character, so that
1428                it will be cached below with this state.  */
1429             if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1430                 && bidi_it->type != WEAK_BN)
1431               bidi_remember_char (&bidi_it->prev, bidi_it);
1432             type = bidi_resolve_weak (bidi_it);
1433             /* Paragraph separators have their levels fully resolved
1434                at this point, so cache them as resolved.  */
1435             bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1436             /* FIXME: implement L1 here, by testing for a newline and
1437                resetting the level for any sequence of whitespace
1438                characters adjacent to it.  */
1439           } while (!(type == NEUTRAL_B
1440                      || (type != WEAK_BN
1441                          && bidi_get_category (type) != NEUTRAL)
1442                      /* This is all per level run, so stop when we
1443                         reach the end of this level run.  */
1444                      || bidi_it->level_stack[bidi_it->stack_idx].level !=
1445                      current_level));
1446
1447           bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1448
1449           switch (type)
1450             {
1451               case STRONG_L:
1452               case STRONG_R:
1453               case STRONG_AL:
1454                 next_type = type;
1455                 break;
1456               case WEAK_EN:
1457               case WEAK_AN:
1458                 /* N1: ``European and Arabic numbers are treated as
1459                    though they were R.''  */
1460                 next_type = STRONG_R;
1461                 saved_it.next_for_neutral.type = STRONG_R;
1462                 break;
1463               case WEAK_BN:
1464                 if (!bidi_explicit_dir_char (bidi_it->ch))
1465                   abort ();             /* can't happen: BNs are skipped */
1466                 /* FALLTHROUGH */
1467               case NEUTRAL_B:
1468                 /* Marched all the way to the end of this level run.
1469                    We need to use the eor type, whose information is
1470                    stored by bidi_set_sor_type in the prev_for_neutral
1471                    member.  */
1472                 if (saved_it.type != WEAK_BN
1473                     || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1474                   {
1475                     next_type = bidi_it->prev_for_neutral.type;
1476                     saved_it.next_for_neutral.type = next_type;
1477                     bidi_check_type (next_type);
1478                   }
1479                 else
1480                   {
1481                     /* This is a BN which does not adjoin neutrals.
1482                        Leave its type alone.  */
1483                     bidi_copy_it (bidi_it, &saved_it);
1484                     return bidi_it->type;
1485                   }
1486                 break;
1487               default:
1488                 abort ();
1489             }
1490           type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1491                                          next_type, current_level);
1492           saved_it.type = type;
1493           bidi_check_type (type);
1494           bidi_copy_it (bidi_it, &saved_it);
1495         }
1496     }
1497   return type;
1498 }
1499
1500 /* Given an iterator state in BIDI_IT, advance one character position
1501    in the buffer/string to the next character (in the logical order),
1502    resolve the bidi type of that next character, and return that
1503    type.  */
1504 static bidi_type_t
1505 bidi_type_of_next_char (struct bidi_it *bidi_it)
1506 {
1507   bidi_type_t type;
1508
1509   /* This should always be called during a forward scan.  */
1510   if (bidi_it->scan_dir != 1)
1511     abort ();
1512
1513   /* Reset the limit until which to ignore BNs if we step out of the
1514      area where we found only empty levels.  */
1515   if ((bidi_it->ignore_bn_limit > 0
1516        && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1517       || (bidi_it->ignore_bn_limit == -1
1518           && !bidi_explicit_dir_char (bidi_it->ch)))
1519     bidi_it->ignore_bn_limit = 0;
1520
1521   type = bidi_resolve_neutral (bidi_it);
1522
1523   return type;
1524 }
1525
1526 /* Given an iterator state BIDI_IT, advance one character position in
1527    the buffer/string to the next character (in the current scan
1528    direction), resolve the embedding and implicit levels of that next
1529    character, and return the resulting level.  */
1530 static int
1531 bidi_level_of_next_char (struct bidi_it *bidi_it)
1532 {
1533   bidi_type_t type;
1534   int level, prev_level = -1;
1535   struct bidi_saved_info next_for_neutral;
1536   EMACS_INT next_char_pos;
1537
1538   if (bidi_it->scan_dir == 1)
1539     {
1540       /* There's no sense in trying to advance if we hit end of text.  */
1541       if (bidi_it->bytepos >= ZV_BYTE)
1542         return bidi_it->resolved_level;
1543
1544       /* Record the info about the previous character.  */
1545       if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1546           && bidi_it->type != WEAK_BN)
1547         bidi_remember_char (&bidi_it->prev, bidi_it);
1548       if (bidi_it->type_after_w1 == STRONG_R
1549           || bidi_it->type_after_w1 == STRONG_L
1550           || bidi_it->type_after_w1 == STRONG_AL)
1551         bidi_remember_char (&bidi_it->last_strong, bidi_it);
1552       /* FIXME: it sounds like we don't need both prev and
1553          prev_for_neutral members, but I'm leaving them both for now.  */
1554       if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1555           || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1556         bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1557
1558       /* If we overstepped the characters used for resolving neutrals
1559          and whitespace, invalidate their info in the iterator.  */
1560       if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1561         bidi_it->next_for_neutral.type = UNKNOWN_BT;
1562       if (bidi_it->next_en_pos >= 0
1563           && bidi_it->charpos >= bidi_it->next_en_pos)
1564         bidi_it->next_en_pos = -1;
1565       if (bidi_it->next_for_ws.type != UNKNOWN_BT
1566           && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1567         bidi_it->next_for_ws.type = UNKNOWN_BT;
1568
1569       /* This must be taken before we fill the iterator with the info
1570          about the next char.  If we scan backwards, the iterator
1571          state must be already cached, so there's no need to know the
1572          embedding level of the previous character, since we will be
1573          returning to our caller shortly.  */
1574       prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1575     }
1576   next_for_neutral = bidi_it->next_for_neutral;
1577
1578   /* Perhaps the character we want is already cached.  If it is, the
1579      call to bidi_cache_find below will return a type other than
1580      UNKNOWN_BT.  */
1581   if (bidi_cache_idx && !bidi_it->first_elt)
1582     {
1583       if (bidi_it->scan_dir > 0)
1584         {
1585           if (bidi_it->nchars <= 0)
1586             abort ();
1587           next_char_pos = bidi_it->charpos + bidi_it->nchars;
1588         }
1589       else
1590         next_char_pos = bidi_it->charpos - 1;
1591       type = bidi_cache_find (next_char_pos, -1, bidi_it);
1592     }
1593   else
1594     type = UNKNOWN_BT;
1595   if (type != UNKNOWN_BT)
1596     {
1597       /* Don't lose the information for resolving neutrals!  The
1598          cached states could have been cached before their
1599          next_for_neutral member was computed.  If we are on our way
1600          forward, we can simply take the info from the previous
1601          state.  */
1602       if (bidi_it->scan_dir == 1
1603           && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1604         bidi_it->next_for_neutral = next_for_neutral;
1605
1606       /* If resolved_level is -1, it means this state was cached
1607          before it was completely resolved, so we cannot return
1608          it.  */
1609       if (bidi_it->resolved_level != -1)
1610         return bidi_it->resolved_level;
1611     }
1612   if (bidi_it->scan_dir == -1)
1613     /* If we are going backwards, the iterator state is already cached
1614        from previous scans, and should be fully resolved.  */
1615     abort ();
1616
1617   if (type == UNKNOWN_BT)
1618     type = bidi_type_of_next_char (bidi_it);
1619
1620   if (type == NEUTRAL_B)
1621     return bidi_it->resolved_level;
1622
1623   level = bidi_it->level_stack[bidi_it->stack_idx].level;
1624   if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1625       || (type == WEAK_BN && prev_level == level))
1626     {
1627       if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1628         abort ();
1629
1630       /* If the cached state shows a neutral character, it was not
1631          resolved by bidi_resolve_neutral, so do it now.  */
1632       type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1633                                      bidi_it->next_for_neutral.type,
1634                                      level);
1635     }
1636
1637   if (!(type == STRONG_R
1638         || type == STRONG_L
1639         || type == WEAK_BN
1640         || type == WEAK_EN
1641         || type == WEAK_AN))
1642     abort ();
1643   bidi_it->type = type;
1644   bidi_check_type (bidi_it->type);
1645
1646   /* For L1 below, we need to know, for each WS character, whether
1647      it belongs to a sequence of WS characters preceding a newline
1648      or a TAB or a paragraph separator.  */
1649   if (bidi_it->orig_type == NEUTRAL_WS
1650       && bidi_it->next_for_ws.type == UNKNOWN_BT)
1651     {
1652       int ch;
1653       EMACS_INT clen = bidi_it->ch_len;
1654       EMACS_INT bpos = bidi_it->bytepos;
1655       EMACS_INT cpos = bidi_it->charpos;
1656       EMACS_INT disp_pos = bidi_it->disp_pos;
1657       EMACS_INT nc = bidi_it->nchars;
1658       bidi_type_t chtype;
1659       int fwp = bidi_it->frame_window_p;
1660
1661       if (bidi_it->nchars <= 0)
1662         abort ();
1663       do {
1664         ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, fwp,
1665                               &clen, &nc);
1666         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1667           chtype = NEUTRAL_B;
1668         else
1669           chtype = bidi_get_type (ch, NEUTRAL_DIR);
1670       } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1671                || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1672       bidi_it->next_for_ws.type = chtype;
1673       bidi_check_type (bidi_it->next_for_ws.type);
1674       bidi_it->next_for_ws.charpos = cpos;
1675       bidi_it->next_for_ws.bytepos = bpos;
1676     }
1677
1678   /* Resolve implicit levels, with a twist: PDFs get the embedding
1679      level of the enbedding they terminate.  See below for the
1680      reason.  */
1681   if (bidi_it->orig_type == PDF
1682       /* Don't do this if this formatting code didn't change the
1683          embedding level due to invalid or empty embeddings.  */
1684       && prev_level != level)
1685     {
1686       /* Don't look in UAX#9 for the reason for this: it's our own
1687          private quirk.  The reason is that we want the formatting
1688          codes to be delivered so that they bracket the text of their
1689          embedding.  For example, given the text
1690
1691              {RLO}teST{PDF}
1692
1693          we want it to be displayed as
1694
1695              {PDF}STet{RLO}
1696
1697          not as
1698
1699              STet{RLO}{PDF}
1700
1701          which will result because we bump up the embedding level as
1702          soon as we see the RLO and pop it as soon as we see the PDF,
1703          so RLO itself has the same embedding level as "teST", and
1704          thus would be normally delivered last, just before the PDF.
1705          The switch below fiddles with the level of PDF so that this
1706          ugly side effect does not happen.
1707
1708          (This is, of course, only important if the formatting codes
1709          are actually displayed, but Emacs does need to display them
1710          if the user wants to.)  */
1711       level = prev_level;
1712     }
1713   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1714            || bidi_it->orig_type == NEUTRAL_S
1715            || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1716            /* || bidi_it->ch == LINESEP_CHAR */
1717            || (bidi_it->orig_type == NEUTRAL_WS
1718                && (bidi_it->next_for_ws.type == NEUTRAL_B
1719                    || bidi_it->next_for_ws.type == NEUTRAL_S)))
1720     level = bidi_it->level_stack[0].level;
1721   else if ((level & 1) == 0) /* I1 */
1722     {
1723       if (type == STRONG_R)
1724         level++;
1725       else if (type == WEAK_EN || type == WEAK_AN)
1726         level += 2;
1727     }
1728   else                  /* I2 */
1729     {
1730       if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1731         level++;
1732     }
1733
1734   bidi_it->resolved_level = level;
1735   return level;
1736 }
1737
1738 /* Move to the other edge of a level given by LEVEL.  If END_FLAG is
1739    non-zero, we are at the end of a level, and we need to prepare to
1740    resume the scan of the lower level.
1741
1742    If this level's other edge is cached, we simply jump to it, filling
1743    the iterator structure with the iterator state on the other edge.
1744    Otherwise, we walk the buffer or string until we come back to the
1745    same level as LEVEL.
1746
1747    Note: we are not talking here about a ``level run'' in the UAX#9
1748    sense of the term, but rather about a ``level'' which includes
1749    all the levels higher than it.  In other words, given the levels
1750    like this:
1751
1752          11111112222222333333334443343222222111111112223322111
1753                 A      B                    C
1754
1755    and assuming we are at point A scanning left to right, this
1756    function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1757    at point B.  */
1758 static void
1759 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1760 {
1761   int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1762   ptrdiff_t idx;
1763
1764   /* Try the cache first.  */
1765   if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1766     bidi_cache_fetch_state (idx, bidi_it);
1767   else
1768     {
1769       int new_level;
1770
1771       if (end_flag)
1772         abort (); /* if we are at end of level, its edges must be cached */
1773
1774       bidi_cache_iterator_state (bidi_it, 1);
1775       do {
1776         new_level = bidi_level_of_next_char (bidi_it);
1777         bidi_cache_iterator_state (bidi_it, 1);
1778       } while (new_level >= level);
1779     }
1780 }
1781
1782 void
1783 bidi_move_to_visually_next (struct bidi_it *bidi_it)
1784 {
1785   int old_level, new_level, next_level;
1786   struct bidi_it sentinel;
1787
1788   if (bidi_it->scan_dir == 0)
1789     {
1790       bidi_it->scan_dir = 1;    /* default to logical order */
1791     }
1792
1793   /* If we just passed a newline, initialize for the next line.  */
1794   if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1795     bidi_line_init (bidi_it);
1796
1797   /* Prepare the sentinel iterator state, and cache it.  When we bump
1798      into it, scanning backwards, we'll know that the last non-base
1799      level is exhausted.  */
1800   if (bidi_cache_idx == 0)
1801     {
1802       bidi_copy_it (&sentinel, bidi_it);
1803       if (bidi_it->first_elt)
1804         {
1805           sentinel.charpos--;   /* cached charpos needs to be monotonic */
1806           sentinel.bytepos--;
1807           sentinel.ch = '\n';   /* doesn't matter, but why not? */
1808           sentinel.ch_len = 1;
1809           sentinel.nchars = 1;
1810         }
1811       bidi_cache_iterator_state (&sentinel, 1);
1812     }
1813
1814   old_level = bidi_it->resolved_level;
1815   new_level = bidi_level_of_next_char (bidi_it);
1816
1817   /* Reordering of resolved levels (clause L2) is implemented by
1818      jumping to the other edge of the level and flipping direction of
1819      scanning the text whenever we find a level change.  */
1820   if (new_level != old_level)
1821     {
1822       int ascending = new_level > old_level;
1823       int level_to_search = ascending ? old_level + 1 : old_level;
1824       int incr = ascending ? 1 : -1;
1825       int expected_next_level = old_level + incr;
1826
1827       /* Jump (or walk) to the other edge of this level.  */
1828       bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1829       /* Switch scan direction and peek at the next character in the
1830          new direction.  */
1831       bidi_it->scan_dir = -bidi_it->scan_dir;
1832
1833       /* The following loop handles the case where the resolved level
1834          jumps by more than one.  This is typical for numbers inside a
1835          run of text with left-to-right embedding direction, but can
1836          also happen in other situations.  In those cases the decision
1837          where to continue after a level change, and in what direction,
1838          is tricky.  For example, given a text like below:
1839
1840                   abcdefgh
1841                   11336622
1842
1843          (where the numbers below the text show the resolved levels),
1844          the result of reordering according to UAX#9 should be this:
1845
1846                   efdcghba
1847
1848          This is implemented by the loop below which flips direction
1849          and jumps to the other edge of the level each time it finds
1850          the new level not to be the expected one.  The expected level
1851          is always one more or one less than the previous one.  */
1852       next_level = bidi_peek_at_next_level (bidi_it);
1853       while (next_level != expected_next_level)
1854         {
1855           expected_next_level += incr;
1856           level_to_search += incr;
1857           bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1858           bidi_it->scan_dir = -bidi_it->scan_dir;
1859           next_level = bidi_peek_at_next_level (bidi_it);
1860         }
1861
1862       /* Finally, deliver the next character in the new direction.  */
1863       next_level = bidi_level_of_next_char (bidi_it);
1864     }
1865
1866   /* Take note when we have just processed the newline that precedes
1867      the end of the paragraph.  The next time we are about to be
1868      called, set_iterator_to_next will automatically reinit the
1869      paragraph direction, if needed.  We do this at the newline before
1870      the paragraph separator, because the next character might not be
1871      the first character of the next paragraph, due to the bidi
1872      reordering, whereas we _must_ know the paragraph base direction
1873      _before_ we process the paragraph's text, since the base
1874      direction affects the reordering.  */
1875   if (bidi_it->scan_dir == 1
1876       && bidi_it->orig_type == NEUTRAL_B
1877       && bidi_it->bytepos < ZV_BYTE)
1878     {
1879       EMACS_INT sep_len =
1880         bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
1881                                bidi_it->bytepos + bidi_it->ch_len);
1882       if (bidi_it->nchars <= 0)
1883         abort ();
1884       if (sep_len >= 0)
1885         {
1886           bidi_it->new_paragraph = 1;
1887           /* Record the buffer position of the last character of the
1888              paragraph separator.  */
1889           bidi_it->separator_limit =
1890             bidi_it->charpos + bidi_it->nchars + sep_len;
1891         }
1892     }
1893
1894   if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1895     {
1896       /* If we are at paragraph's base embedding level and beyond the
1897          last cached position, the cache's job is done and we can
1898          discard it.  */
1899       if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1900           && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
1901                                  + bidi_cache[bidi_cache_idx - 1].nchars - 1))
1902         bidi_cache_reset ();
1903         /* But as long as we are caching during forward scan, we must
1904            cache each state, or else the cache integrity will be
1905            compromised: it assumes cached states correspond to buffer
1906            positions 1:1.  */
1907       else
1908         bidi_cache_iterator_state (bidi_it, 1);
1909     }
1910 }
1911
1912 /* This is meant to be called from within the debugger, whenever you
1913    wish to examine the cache contents.  */
1914 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
1915 void
1916 bidi_dump_cached_states (void)
1917 {
1918   ptrdiff_t i;
1919   int ndigits = 1;
1920
1921   if (bidi_cache_idx == 0)
1922     {
1923       fprintf (stderr, "The cache is empty.\n");
1924       return;
1925     }
1926   fprintf (stderr, "Total of %d state%s in cache:\n",
1927            bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1928
1929   for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1930     ndigits++;
1931   fputs ("ch  ", stderr);
1932   for (i = 0; i < bidi_cache_idx; i++)
1933     fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1934   fputs ("\n", stderr);
1935   fputs ("lvl ", stderr);
1936   for (i = 0; i < bidi_cache_idx; i++)
1937     fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1938   fputs ("\n", stderr);
1939   fputs ("pos ", stderr);
1940   for (i = 0; i < bidi_cache_idx; i++)
1941     fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
1942   fputs ("\n", stderr);
1943 }