Started work on reordering display strings. Refactor FETCH_CHAR.

author Eli Zaretskii <eliz@gnu.org>

Tue, 10 May 2011 16:12:16 +0000 (19:12 +0300)

committer Eli Zaretskii <eliz@gnu.org>

Tue, 10 May 2011 16:12:16 +0000 (19:12 +0300)
author Eli Zaretskii <eliz@gnu.org>
Tue, 10 May 2011 16:12:16 +0000 (19:12 +0300)
committer Eli Zaretskii <eliz@gnu.org>
Tue, 10 May 2011 16:12:16 +0000 (19:12 +0300)
diff --git a/src/ChangeLog b/src/ChangeLog

index 6f70908..03fe002 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,32 @@
+2011-05-10  Eli Zaretskii  <eliz@gnu.org>
+
+       * xdisp.c (compute_display_string_pos): New function.
+       (reseat_1): Initialize bidi_it.disp_pos.
+
+       * bidi.c (bidi_copy_it): Use offsetof.
+       (bidi_fetch_char, bidi_fetch_char_advance): New functions.
+       (bidi_cache_search, bidi_cache_iterator_state)
+       (bidi_paragraph_init, bidi_resolve_explicit, bidi_resolve_weak)
+       (bidi_level_of_next_char, bidi_move_to_visually_next): Support
+       character positions inside a run of characters covered by a
+       display string.
+       (bidi_paragraph_init, bidi_resolve_explicit_1)
+       (bidi_level_of_next_char): Call bidi_fetch_char and
+       bidi_fetch_char_advance instead of FETCH_CHAR and
+       FETCH_CHAR_ADVANCE.
+       (bidi_init_it): Initialize new members.
+       (LRE_CHAR, RLE_CHAR, PDF_CHAR, LRO_CHAR, RLO_CHAR): Remove macro
+       definitions.
+       (bidi_explicit_dir_char): Lookup character type in bidi_type_table,
+       instead of using explicit *_CHAR codes.
+       (bidi_resolve_explicit, bidi_resolve_weak): Use
+       FETCH_MULTIBYTE_CHAR instead of FETCH_CHAR, as reordering of
+       bidirectional text is supported only in multibyte buffers.
+
+       * dispextern.h (struct bidi_it): New members nchars and disp_pos.
+       ch_len is now EMACS_INT.
+       (compute_display_string_pos): Declare prototype.
+
  2011-05-09  Andreas Schwab  <schwab@linux-m68k.org>
  
         * w32menu.c (set_frame_menubar): Fix submenu allocation.
diff --git a/src/bidi.c b/src/bidi.c

index 88c45e2..0a7c92c 100644 (file)
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -62,15 +62,8 @@ static int bidi_initialized = 0;
  
  static Lisp_Object bidi_type_table, bidi_mirror_table;
  
-/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table.  */
  #define LRM_CHAR   0x200E
  #define RLM_CHAR   0x200F
-#define LRE_CHAR   0x202A
-#define RLE_CHAR   0x202B
-#define PDF_CHAR   0x202C
-#define LRO_CHAR   0x202D
-#define RLO_CHAR   0x202E
-
  #define BIDI_EOB   -1
  
  /* Local data structures.  (Look in dispextern.h for the rest.)  */
@@ -258,7 +251,7 @@ bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
    int i;
  
    /* Copy everything except the level stack and beyond.  */
-  memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
+  memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
  
    /* Copy the active part of the level stack.  */
    to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
@@ -319,10 +312,17 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
    if (bidi_cache_idx)
      {
        if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
-       dir = -1;
-      else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
-       dir = 1;
-      if (dir)
+       {
+         dir = -1;
+         i_start = bidi_cache_last_idx - 1;
+       }
+      else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
+                         + bidi_cache[bidi_cache_last_idx].nchars - 1))
+       {
+         dir = 1;
+         i_start = bidi_cache_last_idx + 1;
+       }
+      else if (dir)
         i_start = bidi_cache_last_idx;
        else
         {
@@ -334,14 +334,16 @@ bidi_cache_search (EMACS_INT charpos, int level, int dir)
         {
           /* Linear search for now; FIXME!  */
           for (i = i_start; i >= 0; i--)
-           if (bidi_cache[i].charpos == charpos
+           if (bidi_cache[i].charpos <= charpos
+               && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
                 && (level == -1 || bidi_cache[i].resolved_level <= level))
               return i;
         }
        else
         {
           for (i = i_start; i < bidi_cache_idx; i++)
-           if (bidi_cache[i].charpos == charpos
+           if (bidi_cache[i].charpos <= charpos
+               && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
                 && (level == -1 || bidi_cache[i].resolved_level <= level))
               return i;
         }
@@ -426,7 +428,8 @@ bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
          If we are outside the range of cached positions, the cache is
          useless and must be reset.  */
        if (idx > 0 &&
-         (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
+         (bidi_it->charpos > (bidi_cache[idx - 1].charpos
+                              + bidi_cache[idx - 1].nchars)
            || bidi_it->charpos < bidi_cache[0].charpos))
         {
           bidi_cache_reset ();
@@ -548,6 +551,7 @@ bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
    bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
  }
  
+/* Perform initializations for reordering a new line of bidi text.  */
  static void
  bidi_line_init (struct bidi_it *bidi_it)
  {
@@ -565,6 +569,77 @@ bidi_line_init (struct bidi_it *bidi_it)
    bidi_cache_reset ();
  }
  
+/* Fetch and return the character at BYTEPOS.  If that character is
+   covered by a display string, treat the entire run of covered
+   characters as a single character u+FFFC, and return their combined
+   length in CH_LEN and NCHARS.  DISP_POS specifies the byte position
+   of the next display string, or -1 if not yet computed.  When the
+   next character is at or beyond that position, the function updates
+   DISP_POS with the position of the next display string.  */
+static INLINE int
+bidi_fetch_char (EMACS_INT bytepos, EMACS_INT *disp_pos,
+                EMACS_INT *ch_len, EMACS_INT *nchars)
+{
+  int ch;
+
+  /* FIXME: Support strings in addition to buffers.  */
+  /* If we got past the last known position of display string, compute
+     the position of the next one.  That position could be at BYTEPOS.  */
+  if (bytepos < ZV_BYTE && bytepos > *disp_pos)
+    *disp_pos = compute_display_string_pos (bytepos);
+  if (bytepos >= ZV_BYTE)
+    {
+      ch = BIDI_EOB;
+      *ch_len = 1;
+      *nchars = 1;
+    }
+#if 0
+  else if (bytepos >= *disp_pos)
+    {
+      /* support characters covered by a display string */
+      ch = 0xFFFC;     /* Unicode Object Replacement Character */
+    }
+#endif
+  else
+    {
+      ch = FETCH_MULTIBYTE_CHAR (bytepos);
+      *ch_len = CHAR_BYTES (ch);
+      *nchars = 1;
+    }
+
+  /* If we just entered a run of characters covered by a display
+     string, compute the position of the next display string.  */
+  if (bytepos + *ch_len <= ZV_BYTE && bytepos + *ch_len > *disp_pos)
+    *disp_pos = compute_display_string_pos (bytepos + *ch_len);
+
+  return ch;
+}
+
+/* Looks like we won't need this one.  */
+#if 0
+/* Fetch character at CHARPOS/BYTEPOS.  Return the character, and
+   advance CHARPOS and BYTEPOS to the next character in logical
+   order.  */
+static INLINE int
+bidi_fetch_char_advance (EMACS_INT *charpos, EMACS_INT *bytepos)
+{
+  int ch;
+
+  /* FIXME: Support strings in addition to buffers.  */
+  FETCH_CHAR_ADVANCE_NO_CHECK (ch, charpos, bytepos);
+
+#if 0
+  if (...)
+    {
+      /* FIXME: Support characters covered by display strings.  */
+      ch = 0xFFFC;
+    }
+#endif
+
+  return ch;
+}
+#endif
+
  /* Find the beginning of this paragraph by looking back in the buffer.
     Value is the byte position of the paragraph's beginning.  */
  static EMACS_INT
@@ -576,6 +651,10 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
    while (pos_byte > BEGV_BYTE
          && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
      {
+      /* FIXME: What if the paragraph beginning is covered by a
+        display string?  And what if a display string covering some
+        of the text over which we scan back includes
+        paragraph_start_re?  */
        pos = find_next_newline_no_quit (pos - 1, -1);
        pos_byte = CHAR_TO_BYTE (pos);
      }
@@ -587,7 +666,7 @@ bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
     R2L, just use that.  Otherwise, determine the paragraph direction
     from the first strong directional character of the paragraph.
  
-   NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
+   NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
     has no strong directional characters and both DIR and
     bidi_it->paragraph_dir are NEUTRAL_DIR.  In that case, search back
     in the buffer until a paragraph is found with a strong character,
@@ -622,8 +701,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
      }
    else if (dir == NEUTRAL_DIR) /* P2 */
      {
-      int ch, ch_len;
-      EMACS_INT pos;
+      int ch;
+      EMACS_INT ch_len, nchars;
+      EMACS_INT pos, disp_pos = -1;
        bidi_type_t type;
  
        if (!bidi_initialized)
@@ -658,12 +738,11 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
          is non-zero.  */
        do {
         bytepos = pstartbyte;
-       ch = FETCH_CHAR (bytepos);
-       ch_len = CHAR_BYTES (ch);
+       ch = bidi_fetch_char (bytepos, &ch_len, &nchars, &disp_pos);
         pos = BYTE_TO_CHAR (bytepos);
         type = bidi_get_type (ch, NEUTRAL_DIR);
  
-       for (pos++, bytepos += ch_len;
+       for (pos += nchars, bytepos += ch_len;
              /* NOTE: UAX#9 says to search only for L, AL, or R types
                 of characters, and ignore RLE, RLO, LRE, and LRO.
                 However, I'm not sure it makes sense to omit those 4;
@@ -683,7 +762,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
                 type = NEUTRAL_B;
                 break;
               }
-           FETCH_CHAR_ADVANCE (ch, pos, bytepos);
+           ch = bidi_fetch_char (bytepos, &ch_len, &nchars, &disp_pos);
+           pos += nchars;
+           bytepos += ch_len;
           }
         if (type == STRONG_R || type == STRONG_AL) /* P3 */
           bidi_it->paragraph_dir = R2L;
@@ -702,6 +783,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
                 /* Find the beginning of the previous paragraph, if any.  */
                 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
                   {
+                   /* FXIME: What if p is covered by a display
+                      string?  See also a FIXME inside
+                      bidi_find_paragraph_start.  */
                     p--;
                     pbyte = CHAR_TO_BYTE (p);
                     prevpbyte = bidi_find_paragraph_start (p, pbyte);
@@ -738,7 +822,7 @@ bidi_set_paragraph_end (struct bidi_it *bidi_it)
    bidi_it->resolved_level = bidi_it->level_stack[0].level;
  }
  
-/* Initialize the bidi iterator from buffer position CHARPOS.  */
+/* Initialize the bidi iterator from buffer/string position CHARPOS.  */
  void
  bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
  {
@@ -746,6 +830,7 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
      bidi_initialize ();
    bidi_it->charpos = charpos;
    bidi_it->bytepos = bytepos;
+  bidi_it->nchars = -1;        /* to be computed in bidi_resolve_explicit_1 */
    bidi_it->first_elt = 1;
    bidi_set_paragraph_end (bidi_it);
    bidi_it->new_paragraph = 1;
@@ -767,6 +852,7 @@ bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
      bidi_it->prev_for_neutral.type_after_w1 =
      bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
    bidi_it->sor = L2R;   /* FIXME: should it be user-selectable? */
+  bidi_it->disp_pos = -1;      /* invalid/unknown */
    bidi_cache_shrink ();
  }
  
@@ -829,12 +915,16 @@ bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
  }
  
  static INLINE int
-bidi_explicit_dir_char (int c)
+bidi_explicit_dir_char (int ch)
  {
-  /* FIXME: this should be replaced with a lookup table with suitable
-     bits set, like standard C ctype macros do.  */
-  return (c == LRE_CHAR || c == LRO_CHAR
-         || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
+  bidi_type_t ch_type;
+
+  if (!bidi_initialized)
+    abort ();
+  ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+  return (ch_type == LRE || ch_type == LRO
+         || ch_type == RLE || ch_type == RLO
+         || ch_type == PDF);
  }
  
  /* A helper function for bidi_resolve_explicit.  It advances to the
@@ -850,7 +940,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
    int new_level;
    bidi_dir_t override;
  
-  if (bidi_it->bytepos < BEGV_BYTE     /* after reseat to BEGV? */
+  /* If reseat()'ed, don't advance, so as to start iteration from the
+     position where we were reseated.  bidi_it->bytepos can be less
+     than BEGV_BYTE after reseat to BEGV.  */
+  if (bidi_it->bytepos < BEGV_BYTE
        || bidi_it->first_elt)
      {
        bidi_it->first_elt = 0;
@@ -860,7 +953,9 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
      }
    else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
      {
-      bidi_it->charpos++;
+      /* Advance to the next character, skipping characters covered by
+        display strings (nchars > 1).  */
+      bidi_it->charpos += bidi_it->nchars;
        if (bidi_it->ch_len == 0)
         abort ();
        bidi_it->bytepos += bidi_it->ch_len;
@@ -870,17 +965,20 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
    override = bidi_it->level_stack[bidi_it->stack_idx].override;
    new_level = current_level;
  
-  /* in case it is a unibyte character (not yet implemented) */
-  /* _fetch_multibyte_char_len = 1; */
    if (bidi_it->bytepos >= ZV_BYTE)
      {
        curchar = BIDI_EOB;
        bidi_it->ch_len = 1;
+      bidi_it->nchars = 1;
+      bidi_it->disp_pos = ZV_BYTE;
      }
    else
      {
-      curchar = FETCH_CHAR (bidi_it->bytepos);
-      bidi_it->ch_len = CHAR_BYTES (curchar);
+      /* Fetch the character at BYTEPOS.  If it is covered by a
+        display string, treat the entire run of covered characters as
+        a single character u+FFFC.  */
+      curchar = bidi_fetch_char (bidi_it->bytepos, &bidi_it->ch_len,
+                                &bidi_it->nchars, &bidi_it->disp_pos);
      }
    bidi_it->ch = curchar;
  
@@ -1006,10 +1104,10 @@ bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
  }
  
  /* Given an iterator state in BIDI_IT, advance one character position
-   in the buffer to the next character (in the logical order), resolve
-   any explicit embeddings and directional overrides, and return the
-   embedding level of the character after resolving explicit
-   directives and ignoring empty embeddings.  */
+   in the buffer/string to the next character (in the logical order),
+   resolve any explicit embeddings and directional overrides, and
+   return the embedding level of the character after resolving
+   explicit directives and ignoring empty embeddings.  */
  static int
  bidi_resolve_explicit (struct bidi_it *bidi_it)
  {
@@ -1020,8 +1118,8 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
        && bidi_it->type == WEAK_BN
        && bidi_it->ignore_bn_limit == 0 /* only if not already known */
        && bidi_it->bytepos < ZV_BYTE    /* not already at EOB */
-      && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
-                                            + bidi_it->ch_len)))
+      && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+                                                      + bidi_it->ch_len)))
      {
        /* Avoid pushing and popping embedding levels if the level run
          is empty, as this breaks level runs where it shouldn't.
@@ -1033,14 +1131,16 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
  
        bidi_copy_it (&saved_it, bidi_it);
  
-      while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
-                                                + bidi_it->ch_len)))
+      while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+                                                          + bidi_it->ch_len)))
         {
+         /* This advances to the next character, skipping any
+            characters covered by display strings.  */
           level = bidi_resolve_explicit_1 (bidi_it);
         }
  
        if (level == prev_level) /* empty embedding */
-       saved_it.ignore_bn_limit = bidi_it->charpos + 1;
+       saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
        else                     /* this embedding is non-empty */
         saved_it.ignore_bn_limit = -1;
  
@@ -1076,8 +1176,8 @@ bidi_resolve_explicit (struct bidi_it *bidi_it)
    return new_level;
  }
  
-/* Advance in the buffer, resolve weak types and return the type of
-   the next character after weak type resolution.  */
+/* Advance in the buffer/string, resolve weak types and return the
+   type of the next character after weak type resolution.  */
  static bidi_type_t
  bidi_resolve_weak (struct bidi_it *bidi_it)
  {
@@ -1156,7 +1256,8 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
         {
           next_char =
             bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
-           ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+           ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+                                              + bidi_it->ch_len);
           type_of_next = bidi_get_type (next_char, override);
  
           if (type_of_next == WEAK_BN
@@ -1204,11 +1305,12 @@ bidi_resolve_weak (struct bidi_it *bidi_it)
             type = WEAK_EN;
           else                  /* W5: ET/BN with EN after it.  */
             {
-             EMACS_INT en_pos = bidi_it->charpos + 1;
+             EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
  
               next_char =
                 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
-               ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+               ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
+                                                  + bidi_it->ch_len);
               type_of_next = bidi_get_type (next_char, override);
  
               if (type_of_next == WEAK_ET
@@ -1299,8 +1401,8 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
           /* Arrrgh!!  The UAX#9 algorithm is too deeply entrenched in
              the assumption of batch-style processing; see clauses W4,
              W5, and especially N1, which require to look far forward
-            (as well as back) in the buffer.  May the fleas of a
-            thousand camels infest the armpits of those who design
+            (as well as back) in the buffer/string.  May the fleas of
+            a thousand camels infest the armpits of those who design
              supposedly general-purpose algorithms by looking at their
              own implementations, and fail to consider other possible
              implementations!  */
@@ -1391,8 +1493,9 @@ bidi_resolve_neutral (struct bidi_it *bidi_it)
  }
  
  /* Given an iterator state in BIDI_IT, advance one character position
-   in the buffer to the next character (in the logical order), resolve
-   the bidi type of that next character, and return that type.  */
+   in the buffer/string to the next character (in the logical order),
+   resolve the bidi type of that next character, and return that
+   type.  */
  static bidi_type_t
  bidi_type_of_next_char (struct bidi_it *bidi_it)
  {
@@ -1416,15 +1519,16 @@ bidi_type_of_next_char (struct bidi_it *bidi_it)
  }
  
  /* Given an iterator state BIDI_IT, advance one character position in
-   the buffer to the next character (in the logical order), resolve
-   the embedding and implicit levels of that next character, and
-   return the resulting level.  */
+   the buffer/string to the next character (in the current scan
+   direction), resolve the embedding and implicit levels of that next
+   character, and return the resulting level.  */
  static int
  bidi_level_of_next_char (struct bidi_it *bidi_it)
  {
    bidi_type_t type;
    int level, prev_level = -1;
    struct bidi_saved_info next_for_neutral;
+  EMACS_INT next_char_pos;
  
    if (bidi_it->scan_dir == 1)
      {
@@ -1466,8 +1570,14 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
      }
    next_for_neutral = bidi_it->next_for_neutral;
  
-  /* Perhaps it is already cached.  */
-  type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
+  /* Perhaps the character we want is already cached.  If it is, the
+     call to bidi_cache_find below will return a type other than
+     UNKNOWN_BT.  */
+  if (bidi_it->scan_dir > 0)
+    next_char_pos = bidi_it->charpos + bidi_it->nchars;
+  else
+    next_char_pos = bidi_it->charpos - 1;
+  type = bidi_cache_find (next_char_pos, -1, bidi_it);
    if (type != UNKNOWN_BT)
      {
        /* Don't lose the information for resolving neutrals!  The
@@ -1529,14 +1639,13 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
        int clen = bidi_it->ch_len;
        EMACS_INT bpos = bidi_it->bytepos;
        EMACS_INT cpos = bidi_it->charpos;
+      EMACS_INT disp_pos = bidi_it->disp_pos;
+      EMACS_INT nc;
        bidi_type_t chtype;
  
        do {
-       /*_fetch_multibyte_char_len = 1;*/
-       ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
-       bpos += clen;
-       cpos++;
-       clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
+       ch = bidi_fetch_char (bpos += clen, &clen, &nc, &disp_pos);
+       cpos += nc;
         if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
           chtype = NEUTRAL_B;
         else
@@ -1615,8 +1724,8 @@ bidi_level_of_next_char (struct bidi_it *bidi_it)
  
     If this level's other edge is cached, we simply jump to it, filling
     the iterator structure with the iterator state on the other edge.
-   Otherwise, we walk the buffer until we come back to the same level
-   as LEVEL.
+   Otherwise, we walk the buffer or string until we come back to the
+   same level as LEVEL.
  
     Note: we are not talking here about a ``level run'' in the UAX#9
     sense of the term, but rather about a ``level'' which includes
@@ -1680,6 +1789,7 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
           sentinel.bytepos--;
           sentinel.ch = '\n';   /* doesn't matter, but why not? */
           sentinel.ch_len = 1;
+         sentinel.nchars = 1;
         }
        bidi_cache_iterator_state (&sentinel, 1);
      }
@@ -1750,14 +1860,15 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
        && bidi_it->bytepos < ZV_BYTE)
      {
        EMACS_INT sep_len =
-       bidi_at_paragraph_end (bidi_it->charpos + 1,
+       bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
                                bidi_it->bytepos + bidi_it->ch_len);
        if (sep_len >= 0)
         {
           bidi_it->new_paragraph = 1;
           /* Record the buffer position of the last character of the
              paragraph separator.  */
-         bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
+         bidi_it->separator_limit =
+           bidi_it->charpos + bidi_it->nchars + sep_len;
         }
      }
  
@@ -1767,7 +1878,8 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
          last cached position, the cache's job is done and we can
          discard it.  */
        if (bidi_it->resolved_level == bidi_it->level_stack[0].level
-         && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
+         && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
+                                + bidi_cache[bidi_cache_idx - 1].nchars - 1))
         bidi_cache_reset ();
         /* But as long as we are caching during forward scan, we must
            cache each state, or else the cache integrity will be
diff --git a/src/dispextern.h b/src/dispextern.h

index 72e23e6..f947230 100644 (file)
--- a/src/dispextern.h
+++ b/src/dispextern.h
@@ -1812,12 +1812,16 @@ struct bidi_stack {
    bidi_dir_t override;
  };
  
-/* Data type for iterating over bidi text.  */
+/* Data type for reordering bidirectional text.  */
  struct bidi_it {
    EMACS_INT bytepos;           /* iterator's position in buffer */
    EMACS_INT charpos;
-  int ch;                      /* character itself */
-  int ch_len;                  /* length of its multibyte sequence */
+  int ch;                      /* character at that position, or u+FFFC
+                                  ("object replacement character") for a run
+                                  of characters covered by a display string */
+  EMACS_INT nchars;            /* its "length", usually 1; it's > 1 for a run
+                                  of characters covered by a display string */
+  EMACS_INT ch_len;            /* its length in bytes */
    bidi_type_t type;            /* bidi type of this character, after
                                    resolving weak and neutral types */
    bidi_type_t type_after_w1;   /* original type, after overrides and W1 */
@@ -1844,6 +1848,7 @@ struct bidi_it {
    bidi_dir_t paragraph_dir;    /* current paragraph direction */
    int new_paragraph;           /* if non-zero, we expect a new paragraph */
    EMACS_INT separator_limit;   /* where paragraph separator should end */
+  EMACS_INT disp_pos;          /* byte position of display string after ch */
  };
  
  /* Value is non-zero when the bidi iterator is at base paragraph
@@ -3001,6 +3006,7 @@ extern void reseat_at_previous_visible_line_start (struct it *);
  extern Lisp_Object lookup_glyphless_char_display (int, struct it *);
  extern int calc_pixel_width_or_height (double *, struct it *, Lisp_Object,
                                         struct font *, int, int *);
+extern EMACS_INT compute_display_string_pos (EMACS_INT);
  
  #ifdef HAVE_WINDOW_SYSTEM
  
diff --git a/src/xdisp.c b/src/xdisp.c

index 88353e3..10f69b4 100644 (file)
--- a/src/xdisp.c
+++ b/src/xdisp.c
@@ -3085,6 +3085,20 @@ next_overlay_change (EMACS_INT pos)
    return endpos;
  }
  
+/* Return the byte position of a display string at or after BYTEPOS.
+   If no display string exist at or after BYTEPOS, return ZV_BYTE.  A
+   display string is either an overlay with `display' property whose
+   value is a string or a `display' text property whose value is a
+   string.  */
+EMACS_INT
+compute_display_string_pos (EMACS_INT bytepos)
+{
+  if (bytepos >= ZV_BYTE)
+    return ZV_BYTE;
+  /* FIXME! */
+  return ZV_BYTE;
+}
+
  
  \f
  /***********************************************************************
@@ -5382,6 +5396,7 @@ reseat_1 (struct it *it, struct text_pos pos, int set_stop_p)
      {
        it->bidi_it.first_elt = 1;
        it->bidi_it.paragraph_dir = NEUTRAL_DIR;
+      it->bidi_it.disp_pos = -1;
      }
  
    if (set_stop_p)
author	Eli Zaretskii <eliz@gnu.org>
	Tue, 10 May 2011 16:12:16 +0000 (19:12 +0300)
committer	Eli Zaretskii <eliz@gnu.org>
	Tue, 10 May 2011 16:12:16 +0000 (19:12 +0300)
src/ChangeLog		patch \| blob \| blame \| history
src/bidi.c		patch \| blob \| blame \| history
src/dispextern.h		patch \| blob \| blame \| history
src/xdisp.c		patch \| blob \| blame \| history