1 /* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 (UBA) as per UAX#9, a part of the Unicode Standard.
25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or
29 The main entry point is bidi_move_to_visually_next. Each time it
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
36 character by resolving their levels on the fly.
38 Two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
43 A few auxiliary entry points are used to initialize the bidi
44 iterator for iterating an object (buffer or string), push and pop
45 the bidi iterator state, and save and restore the state of the bidi
48 If you want to understand the code, you will have to read it
49 together with the relevant portions of UAX#9. The comments include
50 references to UAX#9 rules, for that very reason.
52 A note about references to UAX#9 rules: if the reference says
53 something like "X9/Retaining", it means that you need to refer to
54 rule X9 and to its modifications decribed in the "Implementation
55 Notes" section of UAX#9, under "Retaining Format Codes". */
63 #include "character.h"
64 #include "dispextern.h"
66 static int bidi_initialized
= 0;
68 static Lisp_Object bidi_type_table
, bidi_mirror_table
;
70 #define LRM_CHAR 0x200E
71 #define RLM_CHAR 0x200F
74 /* Data type for describing the bidirectional character categories. */
82 /* UAX#9 says to search only for L, AL, or R types of characters, and
83 ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
84 level. Yudit indeed ignores them. This variable is therefore set
85 by default to ignore them, but setting it to zero will take them
87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE
;
88 int bidi_ignore_explicit_marks_for_paragraph_level
= 1;
90 static Lisp_Object paragraph_start_re
, paragraph_separate_re
;
91 static Lisp_Object Qparagraph_start
, Qparagraph_separate
;
94 /***********************************************************************
96 ***********************************************************************/
98 /* Return the bidi type of a character CH, subject to the current
99 directional OVERRIDE. */
100 static inline bidi_type_t
101 bidi_get_type (int ch
, bidi_dir_t override
)
103 bidi_type_t default_type
;
107 if (ch
< 0 || ch
> MAX_CHAR
)
110 default_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
112 if (override
== NEUTRAL_DIR
)
115 switch (default_type
)
117 /* Although UAX#9 does not tell, it doesn't make sense to
118 override NEUTRAL_B and LRM/RLM characters. */
133 if (override
== L2R
) /* X6 */
135 else if (override
== R2L
)
138 abort (); /* can't happen: handled above */
144 bidi_check_type (bidi_type_t type
)
146 if (type
< UNKNOWN_BT
|| type
> NEUTRAL_ON
)
150 /* Given a bidi TYPE of a character, return its category. */
151 static inline bidi_category_t
152 bidi_get_category (bidi_type_t type
)
166 case PDF
: /* ??? really?? */
185 /* Return the mirrored character of C, if it has one. If C has no
186 mirrored counterpart, return C.
187 Note: The conditions in UAX#9 clause L4 regarding the surrounding
188 context must be tested by the caller. */
190 bidi_mirror_char (int c
)
196 if (c
< 0 || c
> MAX_CHAR
)
199 val
= CHAR_TABLE_REF (bidi_mirror_table
, c
);
204 if (v
< 0 || v
> MAX_CHAR
)
213 /* Determine the start-of-run (sor) directional type given the two
214 embedding levels on either side of the run boundary. Also, update
215 the saved info about previously seen characters, since that info is
216 generally valid for a single level run. */
218 bidi_set_sor_type (struct bidi_it
*bidi_it
, int level_before
, int level_after
)
220 int higher_level
= level_before
> level_after
? level_before
: level_after
;
222 /* The prev_was_pdf gork is required for when we have several PDFs
223 in a row. In that case, we want to compute the sor type for the
224 next level run only once: when we see the first PDF. That's
225 because the sor type depends only on the higher of the two levels
226 that we find on the two sides of the level boundary (see UAX#9,
227 clause X10), and so we don't need to know the final embedding
228 level to which we descend after processing all the PDFs. */
229 if (!bidi_it
->prev_was_pdf
|| level_before
< level_after
)
230 /* FIXME: should the default sor direction be user selectable? */
231 bidi_it
->sor
= (higher_level
& 1) != 0 ? R2L
: L2R
;
232 if (level_before
> level_after
)
233 bidi_it
->prev_was_pdf
= 1;
235 bidi_it
->prev
.type
= UNKNOWN_BT
;
236 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
237 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
238 bidi_it
->prev_for_neutral
.type
= bidi_it
->sor
== R2L
? STRONG_R
: STRONG_L
;
239 bidi_it
->prev_for_neutral
.charpos
= bidi_it
->charpos
;
240 bidi_it
->prev_for_neutral
.bytepos
= bidi_it
->bytepos
;
241 bidi_it
->next_for_neutral
.type
= bidi_it
->next_for_neutral
.type_after_w1
=
242 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
243 bidi_it
->ignore_bn_limit
= -1; /* meaning it's unknown */
246 /* Push the current embedding level and override status; reset the
247 current level to LEVEL and the current override status to OVERRIDE. */
249 bidi_push_embedding_level (struct bidi_it
*bidi_it
,
250 int level
, bidi_dir_t override
)
252 bidi_it
->stack_idx
++;
253 xassert (bidi_it
->stack_idx
< BIDI_MAXLEVEL
);
254 bidi_it
->level_stack
[bidi_it
->stack_idx
].level
= level
;
255 bidi_it
->level_stack
[bidi_it
->stack_idx
].override
= override
;
258 /* Pop the embedding level and directional override status from the
259 stack, and return the new level. */
261 bidi_pop_embedding_level (struct bidi_it
*bidi_it
)
263 /* UAX#9 says to ignore invalid PDFs. */
264 if (bidi_it
->stack_idx
> 0)
265 bidi_it
->stack_idx
--;
266 return bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
269 /* Record in SAVED_INFO the information about the current character. */
271 bidi_remember_char (struct bidi_saved_info
*saved_info
,
272 struct bidi_it
*bidi_it
)
274 saved_info
->charpos
= bidi_it
->charpos
;
275 saved_info
->bytepos
= bidi_it
->bytepos
;
276 saved_info
->type
= bidi_it
->type
;
277 bidi_check_type (bidi_it
->type
);
278 saved_info
->type_after_w1
= bidi_it
->type_after_w1
;
279 bidi_check_type (bidi_it
->type_after_w1
);
280 saved_info
->orig_type
= bidi_it
->orig_type
;
281 bidi_check_type (bidi_it
->orig_type
);
284 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
285 copies the part of the level stack that is actually in use. */
287 bidi_copy_it (struct bidi_it
*to
, struct bidi_it
*from
)
291 /* Copy everything except the level stack and beyond. */
292 memcpy (to
, from
, offsetof (struct bidi_it
, level_stack
[0]));
294 /* Copy the active part of the level stack. */
295 to
->level_stack
[0] = from
->level_stack
[0]; /* level zero is always in use */
296 for (i
= 1; i
<= from
->stack_idx
; i
++)
297 to
->level_stack
[i
] = from
->level_stack
[i
];
301 /***********************************************************************
302 Caching the bidi iterator states
303 ***********************************************************************/
305 #define BIDI_CACHE_CHUNK 200
306 static struct bidi_it
*bidi_cache
;
307 static ptrdiff_t bidi_cache_size
= 0;
308 enum { elsz
= sizeof (struct bidi_it
) };
309 static ptrdiff_t bidi_cache_idx
; /* next unused cache slot */
310 static ptrdiff_t bidi_cache_last_idx
; /* slot of last cache hit */
311 static ptrdiff_t bidi_cache_start
= 0; /* start of cache for this
314 /* 5-slot stack for saving the start of the previous level of the
315 cache. xdisp.c maintains a 5-slot stack for its iterator state,
316 and we need the same size of our stack. */
317 static ptrdiff_t bidi_cache_start_stack
[IT_STACK_SIZE
];
318 static int bidi_cache_sp
;
320 /* Size of header used by bidi_shelve_cache. */
323 bidi_shelve_header_size
=
324 (sizeof (bidi_cache_idx
) + sizeof (bidi_cache_start_stack
)
325 + sizeof (bidi_cache_sp
) + sizeof (bidi_cache_start
)
326 + sizeof (bidi_cache_last_idx
))
329 /* Reset the cache state to the empty state. We only reset the part
330 of the cache relevant to iteration of the current object. Previous
331 objects, which are pushed on the display iterator's stack, are left
332 intact. This is called when the cached information is no more
333 useful for the current iteration, e.g. when we were reseated to a
334 new position on the same object. */
336 bidi_cache_reset (void)
338 bidi_cache_idx
= bidi_cache_start
;
339 bidi_cache_last_idx
= -1;
342 /* Shrink the cache to its minimal size. Called when we init the bidi
343 iterator for reordering a buffer or a string that does not come
344 from display properties, because that means all the previously
345 cached info is of no further use. */
347 bidi_cache_shrink (void)
349 if (bidi_cache_size
> BIDI_CACHE_CHUNK
)
351 bidi_cache_size
= BIDI_CACHE_CHUNK
;
353 (struct bidi_it
*) xrealloc (bidi_cache
, bidi_cache_size
* elsz
);
359 bidi_cache_fetch_state (ptrdiff_t idx
, struct bidi_it
*bidi_it
)
361 int current_scan_dir
= bidi_it
->scan_dir
;
363 if (idx
< bidi_cache_start
|| idx
>= bidi_cache_idx
)
366 bidi_copy_it (bidi_it
, &bidi_cache
[idx
]);
367 bidi_it
->scan_dir
= current_scan_dir
;
368 bidi_cache_last_idx
= idx
;
371 /* Find a cached state with a given CHARPOS and resolved embedding
372 level less or equal to LEVEL. if LEVEL is -1, disregard the
373 resolved levels in cached states. DIR, if non-zero, means search
374 in that direction from the last cache hit. */
375 static inline ptrdiff_t
376 bidi_cache_search (EMACS_INT charpos
, int level
, int dir
)
378 ptrdiff_t i
, i_start
;
380 if (bidi_cache_idx
> bidi_cache_start
)
382 if (bidi_cache_last_idx
== -1)
383 bidi_cache_last_idx
= bidi_cache_idx
- 1;
384 if (charpos
< bidi_cache
[bidi_cache_last_idx
].charpos
)
387 i_start
= bidi_cache_last_idx
- 1;
389 else if (charpos
> (bidi_cache
[bidi_cache_last_idx
].charpos
390 + bidi_cache
[bidi_cache_last_idx
].nchars
- 1))
393 i_start
= bidi_cache_last_idx
+ 1;
396 i_start
= bidi_cache_last_idx
;
400 i_start
= bidi_cache_idx
- 1;
405 /* Linear search for now; FIXME! */
406 for (i
= i_start
; i
>= bidi_cache_start
; i
--)
407 if (bidi_cache
[i
].charpos
<= charpos
408 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
409 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
414 for (i
= i_start
; i
< bidi_cache_idx
; i
++)
415 if (bidi_cache
[i
].charpos
<= charpos
416 && charpos
< bidi_cache
[i
].charpos
+ bidi_cache
[i
].nchars
417 && (level
== -1 || bidi_cache
[i
].resolved_level
<= level
))
425 /* Find a cached state where the resolved level changes to a value
426 that is lower than LEVEL, and return its cache slot index. DIR is
427 the direction to search, starting with the last used cache slot.
428 If DIR is zero, we search backwards from the last occupied cache
429 slot. BEFORE, if non-zero, means return the index of the slot that
430 is ``before'' the level change in the search direction. That is,
431 given the cached levels like this:
436 and assuming we are at the position cached at the slot marked with
437 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
438 index of slot B or A, depending whether BEFORE is, respectively,
441 bidi_cache_find_level_change (int level
, int dir
, int before
)
445 ptrdiff_t i
= dir
? bidi_cache_last_idx
: bidi_cache_idx
- 1;
446 int incr
= before
? 1 : 0;
448 xassert (!dir
|| bidi_cache_last_idx
>= 0);
457 while (i
>= bidi_cache_start
+ incr
)
459 if (bidi_cache
[i
- incr
].resolved_level
>= 0
460 && bidi_cache
[i
- incr
].resolved_level
< level
)
467 while (i
< bidi_cache_idx
- incr
)
469 if (bidi_cache
[i
+ incr
].resolved_level
>= 0
470 && bidi_cache
[i
+ incr
].resolved_level
< level
)
481 bidi_cache_ensure_space (ptrdiff_t idx
)
483 /* Enlarge the cache as needed. */
484 if (idx
>= bidi_cache_size
)
486 /* The bidi cache cannot be larger than the largest Lisp string
488 ptrdiff_t string_or_buffer_bound
=
489 max (BUF_BYTES_MAX
, STRING_BYTES_BOUND
);
491 /* Also, it cannot be larger than what C can represent. */
493 (min (PTRDIFF_MAX
, SIZE_MAX
) - bidi_shelve_header_size
) / elsz
;
496 xpalloc (bidi_cache
, &bidi_cache_size
,
497 max (BIDI_CACHE_CHUNK
, idx
- bidi_cache_size
+ 1),
498 min (string_or_buffer_bound
, c_bound
), elsz
);
503 bidi_cache_iterator_state (struct bidi_it
*bidi_it
, int resolved
)
507 /* We should never cache on backward scans. */
508 if (bidi_it
->scan_dir
== -1)
510 idx
= bidi_cache_search (bidi_it
->charpos
, -1, 1);
514 idx
= bidi_cache_idx
;
515 bidi_cache_ensure_space (idx
);
516 /* Character positions should correspond to cache positions 1:1.
517 If we are outside the range of cached positions, the cache is
518 useless and must be reset. */
519 if (idx
> bidi_cache_start
&&
520 (bidi_it
->charpos
> (bidi_cache
[idx
- 1].charpos
521 + bidi_cache
[idx
- 1].nchars
)
522 || bidi_it
->charpos
< bidi_cache
[bidi_cache_start
].charpos
))
525 idx
= bidi_cache_start
;
527 if (bidi_it
->nchars
<= 0)
529 bidi_copy_it (&bidi_cache
[idx
], bidi_it
);
531 bidi_cache
[idx
].resolved_level
= -1;
535 /* Copy only the members which could have changed, to avoid
536 costly copying of the entire struct. */
537 bidi_cache
[idx
].type
= bidi_it
->type
;
538 bidi_check_type (bidi_it
->type
);
539 bidi_cache
[idx
].type_after_w1
= bidi_it
->type_after_w1
;
540 bidi_check_type (bidi_it
->type_after_w1
);
542 bidi_cache
[idx
].resolved_level
= bidi_it
->resolved_level
;
544 bidi_cache
[idx
].resolved_level
= -1;
545 bidi_cache
[idx
].invalid_levels
= bidi_it
->invalid_levels
;
546 bidi_cache
[idx
].invalid_rl_levels
= bidi_it
->invalid_rl_levels
;
547 bidi_cache
[idx
].next_for_neutral
= bidi_it
->next_for_neutral
;
548 bidi_cache
[idx
].next_for_ws
= bidi_it
->next_for_ws
;
549 bidi_cache
[idx
].ignore_bn_limit
= bidi_it
->ignore_bn_limit
;
552 bidi_cache_last_idx
= idx
;
553 if (idx
>= bidi_cache_idx
)
554 bidi_cache_idx
= idx
+ 1;
557 static inline bidi_type_t
558 bidi_cache_find (EMACS_INT charpos
, int level
, struct bidi_it
*bidi_it
)
560 ptrdiff_t i
= bidi_cache_search (charpos
, level
, bidi_it
->scan_dir
);
562 if (i
>= bidi_cache_start
)
564 bidi_dir_t current_scan_dir
= bidi_it
->scan_dir
;
566 bidi_copy_it (bidi_it
, &bidi_cache
[i
]);
567 bidi_cache_last_idx
= i
;
568 /* Don't let scan direction from from the cached state override
569 the current scan direction. */
570 bidi_it
->scan_dir
= current_scan_dir
;
571 return bidi_it
->type
;
578 bidi_peek_at_next_level (struct bidi_it
*bidi_it
)
580 if (bidi_cache_idx
== bidi_cache_start
|| bidi_cache_last_idx
== -1)
582 return bidi_cache
[bidi_cache_last_idx
+ bidi_it
->scan_dir
].resolved_level
;
586 /***********************************************************************
587 Pushing and popping the bidi iterator state
588 ***********************************************************************/
590 /* Push the bidi iterator state in preparation for reordering a
591 different object, e.g. display string found at certain buffer
592 position. Pushing the bidi iterator boils down to saving its
593 entire state on the cache and starting a new cache "stacked" on top
594 of the current cache. */
596 bidi_push_it (struct bidi_it
*bidi_it
)
598 /* Save the current iterator state in its entirety after the last
600 bidi_cache_ensure_space (bidi_cache_idx
);
601 memcpy (&bidi_cache
[bidi_cache_idx
++], bidi_it
, sizeof (struct bidi_it
));
603 /* Push the current cache start onto the stack. */
604 xassert (bidi_cache_sp
< IT_STACK_SIZE
);
605 bidi_cache_start_stack
[bidi_cache_sp
++] = bidi_cache_start
;
607 /* Start a new level of cache, and make it empty. */
608 bidi_cache_start
= bidi_cache_idx
;
609 bidi_cache_last_idx
= -1;
612 /* Restore the iterator state saved by bidi_push_it and return the
613 cache to the corresponding state. */
615 bidi_pop_it (struct bidi_it
*bidi_it
)
617 if (bidi_cache_start
<= 0)
620 /* Reset the next free cache slot index to what it was before the
621 call to bidi_push_it. */
622 bidi_cache_idx
= bidi_cache_start
- 1;
624 /* Restore the bidi iterator state saved in the cache. */
625 memcpy (bidi_it
, &bidi_cache
[bidi_cache_idx
], sizeof (struct bidi_it
));
627 /* Pop the previous cache start from the stack. */
628 if (bidi_cache_sp
<= 0)
630 bidi_cache_start
= bidi_cache_start_stack
[--bidi_cache_sp
];
632 /* Invalidate the last-used cache slot data. */
633 bidi_cache_last_idx
= -1;
636 static ptrdiff_t bidi_cache_total_alloc
;
638 /* Stash away a copy of the cache and its control variables. */
640 bidi_shelve_cache (void)
642 unsigned char *databuf
;
646 if (bidi_cache_idx
== 0)
649 alloc
= (bidi_shelve_header_size
650 + bidi_cache_idx
* sizeof (struct bidi_it
));
651 databuf
= xmalloc (alloc
);
652 bidi_cache_total_alloc
+= alloc
;
654 memcpy (databuf
, &bidi_cache_idx
, sizeof (bidi_cache_idx
));
655 memcpy (databuf
+ sizeof (bidi_cache_idx
),
656 bidi_cache
, bidi_cache_idx
* sizeof (struct bidi_it
));
657 memcpy (databuf
+ sizeof (bidi_cache_idx
)
658 + bidi_cache_idx
* sizeof (struct bidi_it
),
659 bidi_cache_start_stack
, sizeof (bidi_cache_start_stack
));
660 memcpy (databuf
+ sizeof (bidi_cache_idx
)
661 + bidi_cache_idx
* sizeof (struct bidi_it
)
662 + sizeof (bidi_cache_start_stack
),
663 &bidi_cache_sp
, sizeof (bidi_cache_sp
));
664 memcpy (databuf
+ sizeof (bidi_cache_idx
)
665 + bidi_cache_idx
* sizeof (struct bidi_it
)
666 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
),
667 &bidi_cache_start
, sizeof (bidi_cache_start
));
668 memcpy (databuf
+ sizeof (bidi_cache_idx
)
669 + bidi_cache_idx
* sizeof (struct bidi_it
)
670 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
)
671 + sizeof (bidi_cache_start
),
672 &bidi_cache_last_idx
, sizeof (bidi_cache_last_idx
));
677 /* Restore the cache state from a copy stashed away by
678 bidi_shelve_cache, and free the buffer used to stash that copy.
679 JUST_FREE non-zero means free the buffer, but don't restore the
680 cache; used when the corresponding iterator is discarded instead of
683 bidi_unshelve_cache (void *databuf
, int just_free
)
685 unsigned char *p
= databuf
;
691 /* A NULL pointer means an empty cache. */
692 bidi_cache_start
= 0;
703 memcpy (&idx
, p
, sizeof (bidi_cache_idx
));
704 bidi_cache_total_alloc
-=
705 bidi_shelve_header_size
+ idx
* sizeof (struct bidi_it
);
709 memcpy (&bidi_cache_idx
, p
, sizeof (bidi_cache_idx
));
710 bidi_cache_ensure_space (bidi_cache_idx
);
711 memcpy (bidi_cache
, p
+ sizeof (bidi_cache_idx
),
712 bidi_cache_idx
* sizeof (struct bidi_it
));
713 memcpy (bidi_cache_start_stack
,
714 p
+ sizeof (bidi_cache_idx
)
715 + bidi_cache_idx
* sizeof (struct bidi_it
),
716 sizeof (bidi_cache_start_stack
));
717 memcpy (&bidi_cache_sp
,
718 p
+ sizeof (bidi_cache_idx
)
719 + bidi_cache_idx
* sizeof (struct bidi_it
)
720 + sizeof (bidi_cache_start_stack
),
721 sizeof (bidi_cache_sp
));
722 memcpy (&bidi_cache_start
,
723 p
+ sizeof (bidi_cache_idx
)
724 + bidi_cache_idx
* sizeof (struct bidi_it
)
725 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
),
726 sizeof (bidi_cache_start
));
727 memcpy (&bidi_cache_last_idx
,
728 p
+ sizeof (bidi_cache_idx
)
729 + bidi_cache_idx
* sizeof (struct bidi_it
)
730 + sizeof (bidi_cache_start_stack
) + sizeof (bidi_cache_sp
)
731 + sizeof (bidi_cache_start
),
732 sizeof (bidi_cache_last_idx
));
733 bidi_cache_total_alloc
-=
734 bidi_shelve_header_size
+ bidi_cache_idx
* sizeof (struct bidi_it
);
742 /***********************************************************************
744 ***********************************************************************/
746 bidi_initialize (void)
748 bidi_type_table
= uniprop_table (intern ("bidi-class"));
749 if (NILP (bidi_type_table
))
751 staticpro (&bidi_type_table
);
753 bidi_mirror_table
= uniprop_table (intern ("mirroring"));
754 if (NILP (bidi_mirror_table
))
756 staticpro (&bidi_mirror_table
);
758 Qparagraph_start
= intern ("paragraph-start");
759 staticpro (&Qparagraph_start
);
760 paragraph_start_re
= Fsymbol_value (Qparagraph_start
);
761 if (!STRINGP (paragraph_start_re
))
762 paragraph_start_re
= build_string ("\f\\|[ \t]*$");
763 staticpro (¶graph_start_re
);
764 Qparagraph_separate
= intern ("paragraph-separate");
765 staticpro (&Qparagraph_separate
);
766 paragraph_separate_re
= Fsymbol_value (Qparagraph_separate
);
767 if (!STRINGP (paragraph_separate_re
))
768 paragraph_separate_re
= build_string ("[ \t\f]*$");
769 staticpro (¶graph_separate_re
);
772 bidi_cache_total_alloc
= 0;
774 bidi_initialized
= 1;
777 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
780 bidi_set_paragraph_end (struct bidi_it
*bidi_it
)
782 bidi_it
->invalid_levels
= 0;
783 bidi_it
->invalid_rl_levels
= -1;
784 bidi_it
->stack_idx
= 0;
785 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
788 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
790 bidi_init_it (EMACS_INT charpos
, EMACS_INT bytepos
, int frame_window_p
,
791 struct bidi_it
*bidi_it
)
793 if (! bidi_initialized
)
796 bidi_it
->charpos
= charpos
;
798 bidi_it
->bytepos
= bytepos
;
799 bidi_it
->frame_window_p
= frame_window_p
;
800 bidi_it
->nchars
= -1; /* to be computed in bidi_resolve_explicit_1 */
801 bidi_it
->first_elt
= 1;
802 bidi_set_paragraph_end (bidi_it
);
803 bidi_it
->new_paragraph
= 1;
804 bidi_it
->separator_limit
= -1;
805 bidi_it
->type
= NEUTRAL_B
;
806 bidi_it
->type_after_w1
= NEUTRAL_B
;
807 bidi_it
->orig_type
= NEUTRAL_B
;
808 bidi_it
->prev_was_pdf
= 0;
809 bidi_it
->prev
.type
= bidi_it
->prev
.type_after_w1
=
810 bidi_it
->prev
.orig_type
= UNKNOWN_BT
;
811 bidi_it
->last_strong
.type
= bidi_it
->last_strong
.type_after_w1
=
812 bidi_it
->last_strong
.orig_type
= UNKNOWN_BT
;
813 bidi_it
->next_for_neutral
.charpos
= -1;
814 bidi_it
->next_for_neutral
.type
=
815 bidi_it
->next_for_neutral
.type_after_w1
=
816 bidi_it
->next_for_neutral
.orig_type
= UNKNOWN_BT
;
817 bidi_it
->prev_for_neutral
.charpos
= -1;
818 bidi_it
->prev_for_neutral
.type
=
819 bidi_it
->prev_for_neutral
.type_after_w1
=
820 bidi_it
->prev_for_neutral
.orig_type
= UNKNOWN_BT
;
821 bidi_it
->sor
= L2R
; /* FIXME: should it be user-selectable? */
822 bidi_it
->disp_pos
= -1; /* invalid/unknown */
823 bidi_it
->disp_prop_p
= 0;
824 /* We can only shrink the cache if we are at the bottom level of its
826 if (bidi_cache_start
== 0)
827 bidi_cache_shrink ();
832 /* Perform initializations for reordering a new line of bidi text. */
834 bidi_line_init (struct bidi_it
*bidi_it
)
836 bidi_it
->scan_dir
= 1; /* FIXME: do we need to have control on this? */
837 bidi_it
->resolved_level
= bidi_it
->level_stack
[0].level
;
838 bidi_it
->level_stack
[0].override
= NEUTRAL_DIR
; /* X1 */
839 bidi_it
->invalid_levels
= 0;
840 bidi_it
->invalid_rl_levels
= -1;
841 bidi_it
->next_en_pos
= -1;
842 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
843 bidi_set_sor_type (bidi_it
,
844 bidi_it
->paragraph_dir
== R2L
? 1 : 0,
845 bidi_it
->level_stack
[0].level
); /* X10 */
851 /***********************************************************************
853 ***********************************************************************/
855 /* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
856 are zero-based character positions in S, BEGBYTE is byte position
857 corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
859 static inline EMACS_INT
860 bidi_count_bytes (const unsigned char *s
, const EMACS_INT beg
,
861 const EMACS_INT begbyte
, const EMACS_INT end
, int unibyte
)
864 const unsigned char *p
= s
+ begbyte
, *start
= p
;
870 if (!CHAR_HEAD_P (*p
))
875 p
+= BYTES_BY_CHAR_HEAD (*p
);
883 /* Fetch and returns the character at byte position BYTEPOS. If S is
884 non-NULL, fetch the character from string S; otherwise fetch the
885 character from the current buffer. UNIBYTE non-zero means S is a
888 bidi_char_at_pos (EMACS_INT bytepos
, const unsigned char *s
, int unibyte
)
895 return STRING_CHAR (s
+ bytepos
);
898 return FETCH_MULTIBYTE_CHAR (bytepos
);
901 /* Fetch and return the character at BYTEPOS/CHARPOS. If that
902 character is covered by a display string, treat the entire run of
903 covered characters as a single character u+FFFC, and return their
904 combined length in CH_LEN and NCHARS. DISP_POS specifies the
905 character position of the next display string, or -1 if not yet
906 computed. DISP_PROP_P non-zero means that there's really a display
907 string at DISP_POS, as opposed to when we searched till DISP_POS
908 without findingone. When the next character is at or beyond that
909 position, the function updates DISP_POS with the position of the
910 next display string. STRING->s is the C string to iterate, or NULL
911 if iterating over a buffer or a Lisp string; in the latter case,
912 STRING->lstring is the Lisp string. */
914 bidi_fetch_char (EMACS_INT bytepos
, EMACS_INT charpos
, EMACS_INT
*disp_pos
,
915 int *disp_prop_p
, struct bidi_string_data
*string
,
916 int frame_window_p
, EMACS_INT
*ch_len
, EMACS_INT
*nchars
)
920 (string
->s
|| STRINGP (string
->lstring
)) ? string
->schars
: ZV
;
923 /* If we got past the last known position of display string, compute
924 the position of the next one. That position could be at CHARPOS. */
925 if (charpos
< endpos
&& charpos
> *disp_pos
)
927 SET_TEXT_POS (pos
, charpos
, bytepos
);
928 *disp_pos
= compute_display_string_pos (&pos
, string
, frame_window_p
,
932 /* Fetch the character at BYTEPOS. */
933 if (charpos
>= endpos
)
941 else if (charpos
>= *disp_pos
&& *disp_prop_p
)
943 EMACS_INT disp_end_pos
;
945 /* We don't expect to find ourselves in the middle of a display
946 property. Hopefully, it will never be needed. */
947 if (charpos
> *disp_pos
)
949 /* Return the Unicode Object Replacement Character to represent
950 the entire run of characters covered by the display string. */
952 disp_end_pos
= compute_display_string_end (*disp_pos
, string
);
953 *nchars
= disp_end_pos
- *disp_pos
;
957 *ch_len
= bidi_count_bytes (string
->s
, *disp_pos
, bytepos
,
958 disp_end_pos
, string
->unibyte
);
959 else if (STRINGP (string
->lstring
))
960 *ch_len
= bidi_count_bytes (SDATA (string
->lstring
), *disp_pos
,
961 bytepos
, disp_end_pos
, string
->unibyte
);
963 *ch_len
= CHAR_TO_BYTE (disp_end_pos
) - bytepos
;
971 if (!string
->unibyte
)
973 ch
= STRING_CHAR_AND_LENGTH (string
->s
+ bytepos
, len
);
978 ch
= UNIBYTE_TO_CHAR (string
->s
[bytepos
]);
982 else if (STRINGP (string
->lstring
))
986 if (!string
->unibyte
)
988 ch
= STRING_CHAR_AND_LENGTH (SDATA (string
->lstring
) + bytepos
,
994 ch
= UNIBYTE_TO_CHAR (SREF (string
->lstring
, bytepos
));
1000 ch
= FETCH_MULTIBYTE_CHAR (bytepos
);
1001 *ch_len
= CHAR_BYTES (ch
);
1006 /* If we just entered a run of characters covered by a display
1007 string, compute the position of the next display string. */
1008 if (charpos
+ *nchars
<= endpos
&& charpos
+ *nchars
> *disp_pos
1011 SET_TEXT_POS (pos
, charpos
+ *nchars
, bytepos
+ *ch_len
);
1012 *disp_pos
= compute_display_string_pos (&pos
, string
, frame_window_p
,
1020 /***********************************************************************
1021 Determining paragraph direction
1022 ***********************************************************************/
1024 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1025 Value is the non-negative length of the paragraph separator
1026 following the buffer position, -1 if position is at the beginning
1027 of a new paragraph, or -2 if position is neither at beginning nor
1028 at end of a paragraph. */
1030 bidi_at_paragraph_end (EMACS_INT charpos
, EMACS_INT bytepos
)
1033 Lisp_Object start_re
;
1036 sep_re
= paragraph_separate_re
;
1037 start_re
= paragraph_start_re
;
1039 val
= fast_looking_at (sep_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
);
1042 if (fast_looking_at (start_re
, charpos
, bytepos
, ZV
, ZV_BYTE
, Qnil
) >= 0)
1051 /* Find the beginning of this paragraph by looking back in the buffer.
1052 Value is the byte position of the paragraph's beginning. */
1054 bidi_find_paragraph_start (EMACS_INT pos
, EMACS_INT pos_byte
)
1056 Lisp_Object re
= paragraph_start_re
;
1057 EMACS_INT limit
= ZV
, limit_byte
= ZV_BYTE
;
1059 while (pos_byte
> BEGV_BYTE
1060 && fast_looking_at (re
, pos
, pos_byte
, limit
, limit_byte
, Qnil
) < 0)
1062 /* FIXME: What if the paragraph beginning is covered by a
1063 display string? And what if a display string covering some
1064 of the text over which we scan back includes
1065 paragraph_start_re? */
1066 pos
= find_next_newline_no_quit (pos
- 1, -1);
1067 pos_byte
= CHAR_TO_BYTE (pos
);
1072 /* Determine the base direction, a.k.a. base embedding level, of the
1073 paragraph we are about to iterate through. If DIR is either L2R or
1074 R2L, just use that. Otherwise, determine the paragraph direction
1075 from the first strong directional character of the paragraph.
1077 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
1078 has no strong directional characters and both DIR and
1079 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1080 in the buffer until a paragraph is found with a strong character,
1081 or until hitting BEGV. In the latter case, fall back to L2R. This
1082 flag is used in current-bidi-paragraph-direction.
1084 Note that this function gives the paragraph separator the same
1085 direction as the preceding paragraph, even though Emacs generally
1086 views the separartor as not belonging to any paragraph. */
1088 bidi_paragraph_init (bidi_dir_t dir
, struct bidi_it
*bidi_it
, int no_default_p
)
1090 EMACS_INT bytepos
= bidi_it
->bytepos
;
1091 int string_p
= bidi_it
->string
.s
!= NULL
|| STRINGP (bidi_it
->string
.lstring
);
1092 EMACS_INT pstartbyte
;
1093 /* Note that begbyte is a byte position, while end is a character
1094 position. Yes, this is ugly, but we are trying to avoid costly
1095 calls to BYTE_TO_CHAR and its ilk. */
1096 EMACS_INT begbyte
= string_p
? 0 : BEGV_BYTE
;
1097 EMACS_INT end
= string_p
? bidi_it
->string
.schars
: ZV
;
1099 /* Special case for an empty buffer. */
1100 if (bytepos
== begbyte
&& bidi_it
->charpos
== end
)
1102 /* We should never be called at EOB or before BEGV. */
1103 else if (bidi_it
->charpos
>= end
|| bytepos
< begbyte
)
1108 bidi_it
->paragraph_dir
= L2R
;
1109 bidi_it
->new_paragraph
= 0;
1111 else if (dir
== R2L
)
1113 bidi_it
->paragraph_dir
= R2L
;
1114 bidi_it
->new_paragraph
= 0;
1116 else if (dir
== NEUTRAL_DIR
) /* P2 */
1119 EMACS_INT ch_len
, nchars
;
1120 EMACS_INT pos
, disp_pos
= -1;
1121 int disp_prop_p
= 0;
1123 const unsigned char *s
;
1125 if (!bidi_initialized
)
1128 /* If we are inside a paragraph separator, we are just waiting
1129 for the separator to be exhausted; use the previous paragraph
1130 direction. But don't do that if we have been just reseated,
1131 because we need to reinitialize below in that case. */
1132 if (!bidi_it
->first_elt
1133 && bidi_it
->charpos
< bidi_it
->separator_limit
)
1136 /* If we are on a newline, get past it to where the next
1137 paragraph might start. But don't do that at BEGV since then
1138 we are potentially in a new paragraph that doesn't yet
1140 pos
= bidi_it
->charpos
;
1141 s
= STRINGP (bidi_it
->string
.lstring
) ?
1142 SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1143 if (bytepos
> begbyte
1144 && bidi_char_at_pos (bytepos
, s
, bidi_it
->string
.unibyte
) == '\n')
1150 /* We are either at the beginning of a paragraph or in the
1151 middle of it. Find where this paragraph starts. */
1154 /* We don't support changes of paragraph direction inside a
1155 string. It is treated as a single paragraph. */
1159 pstartbyte
= bidi_find_paragraph_start (pos
, bytepos
);
1160 bidi_it
->separator_limit
= -1;
1161 bidi_it
->new_paragraph
= 0;
1163 /* The following loop is run more than once only if NO_DEFAULT_P
1164 is non-zero, and only if we are iterating on a buffer. */
1166 bytepos
= pstartbyte
;
1168 pos
= BYTE_TO_CHAR (bytepos
);
1169 ch
= bidi_fetch_char (bytepos
, pos
, &disp_pos
, &disp_prop_p
,
1171 bidi_it
->frame_window_p
, &ch_len
, &nchars
);
1172 type
= bidi_get_type (ch
, NEUTRAL_DIR
);
1174 for (pos
+= nchars
, bytepos
+= ch_len
;
1175 (bidi_get_category (type
) != STRONG
)
1176 || (bidi_ignore_explicit_marks_for_paragraph_level
1177 && (type
== RLE
|| type
== RLO
1178 || type
== LRE
|| type
== LRO
));
1179 type
= bidi_get_type (ch
, NEUTRAL_DIR
))
1183 /* Pretend there's a paragraph separator at end of
1189 && type
== NEUTRAL_B
1190 && bidi_at_paragraph_end (pos
, bytepos
) >= -1)
1192 /* Fetch next character and advance to get past it. */
1193 ch
= bidi_fetch_char (bytepos
, pos
, &disp_pos
,
1194 &disp_prop_p
, &bidi_it
->string
,
1195 bidi_it
->frame_window_p
, &ch_len
, &nchars
);
1199 if ((type
== STRONG_R
|| type
== STRONG_AL
) /* P3 */
1200 || (!bidi_ignore_explicit_marks_for_paragraph_level
1201 && (type
== RLO
|| type
== RLE
)))
1202 bidi_it
->paragraph_dir
= R2L
;
1203 else if (type
== STRONG_L
1204 || (!bidi_ignore_explicit_marks_for_paragraph_level
1205 && (type
== LRO
|| type
== LRE
)))
1206 bidi_it
->paragraph_dir
= L2R
;
1208 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
)
1210 /* If this paragraph is at BEGV, default to L2R. */
1211 if (pstartbyte
== BEGV_BYTE
)
1212 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 */
1215 EMACS_INT prevpbyte
= pstartbyte
;
1216 EMACS_INT p
= BYTE_TO_CHAR (pstartbyte
), pbyte
= pstartbyte
;
1218 /* Find the beginning of the previous paragraph, if any. */
1219 while (pbyte
> BEGV_BYTE
&& prevpbyte
>= pstartbyte
)
1221 /* FXIME: What if p is covered by a display
1222 string? See also a FIXME inside
1223 bidi_find_paragraph_start. */
1225 pbyte
= CHAR_TO_BYTE (p
);
1226 prevpbyte
= bidi_find_paragraph_start (p
, pbyte
);
1228 pstartbyte
= prevpbyte
;
1232 && no_default_p
&& bidi_it
->paragraph_dir
== NEUTRAL_DIR
);
1237 /* Contrary to UAX#9 clause P3, we only default the paragraph
1238 direction to L2R if we have no previous usable paragraph
1239 direction. This is allowed by the HL1 clause. */
1240 if (bidi_it
->paragraph_dir
!= L2R
&& bidi_it
->paragraph_dir
!= R2L
)
1241 bidi_it
->paragraph_dir
= L2R
; /* P3 and HL1 ``higher-level protocols'' */
1242 if (bidi_it
->paragraph_dir
== R2L
)
1243 bidi_it
->level_stack
[0].level
= 1;
1245 bidi_it
->level_stack
[0].level
= 0;
1247 bidi_line_init (bidi_it
);
1251 /***********************************************************************
1252 Resolving explicit and implicit levels.
1253 The rest of this file constitutes the core of the UBA implementation.
1254 ***********************************************************************/
1257 bidi_explicit_dir_char (int ch
)
1259 bidi_type_t ch_type
;
1261 if (!bidi_initialized
)
1263 ch_type
= (bidi_type_t
) XINT (CHAR_TABLE_REF (bidi_type_table
, ch
));
1264 return (ch_type
== LRE
|| ch_type
== LRO
1265 || ch_type
== RLE
|| ch_type
== RLO
1269 /* A helper function for bidi_resolve_explicit. It advances to the
1270 next character in logical order and determines the new embedding
1271 level and directional override, but does not take into account
1272 empty embeddings. */
1274 bidi_resolve_explicit_1 (struct bidi_it
*bidi_it
)
1280 bidi_dir_t override
;
1281 int string_p
= bidi_it
->string
.s
!= NULL
|| STRINGP (bidi_it
->string
.lstring
);
1283 /* If reseat()'ed, don't advance, so as to start iteration from the
1284 position where we were reseated. bidi_it->bytepos can be less
1285 than BEGV_BYTE after reseat to BEGV. */
1286 if (bidi_it
->bytepos
< (string_p
? 0 : BEGV_BYTE
)
1287 || bidi_it
->first_elt
)
1289 bidi_it
->first_elt
= 0;
1292 const unsigned char *p
=
1293 STRINGP (bidi_it
->string
.lstring
)
1294 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1296 if (bidi_it
->charpos
< 0)
1297 bidi_it
->charpos
= 0;
1298 bidi_it
->bytepos
= bidi_count_bytes (p
, 0, 0, bidi_it
->charpos
,
1299 bidi_it
->string
.unibyte
);
1303 if (bidi_it
->charpos
< BEGV
)
1304 bidi_it
->charpos
= BEGV
;
1305 bidi_it
->bytepos
= CHAR_TO_BYTE (bidi_it
->charpos
);
1308 /* Don't move at end of buffer/string. */
1309 else if (bidi_it
->charpos
< (string_p
? bidi_it
->string
.schars
: ZV
))
1311 /* Advance to the next character, skipping characters covered by
1312 display strings (nchars > 1). */
1313 if (bidi_it
->nchars
<= 0)
1315 bidi_it
->charpos
+= bidi_it
->nchars
;
1316 if (bidi_it
->ch_len
== 0)
1318 bidi_it
->bytepos
+= bidi_it
->ch_len
;
1321 current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
; /* X1 */
1322 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1323 new_level
= current_level
;
1325 if (bidi_it
->charpos
>= (string_p
? bidi_it
->string
.schars
: ZV
))
1328 bidi_it
->ch_len
= 1;
1329 bidi_it
->nchars
= 1;
1330 bidi_it
->disp_pos
= (string_p
? bidi_it
->string
.schars
: ZV
);
1331 bidi_it
->disp_prop_p
= 0;
1335 /* Fetch the character at BYTEPOS. If it is covered by a
1336 display string, treat the entire run of covered characters as
1337 a single character u+FFFC. */
1338 curchar
= bidi_fetch_char (bidi_it
->bytepos
, bidi_it
->charpos
,
1339 &bidi_it
->disp_pos
, &bidi_it
->disp_prop_p
,
1340 &bidi_it
->string
, bidi_it
->frame_window_p
,
1341 &bidi_it
->ch_len
, &bidi_it
->nchars
);
1343 bidi_it
->ch
= curchar
;
1345 /* Don't apply directional override here, as all the types we handle
1346 below will not be affected by the override anyway, and we need
1347 the original type unaltered. The override will be applied in
1348 bidi_resolve_weak. */
1349 type
= bidi_get_type (curchar
, NEUTRAL_DIR
);
1350 bidi_it
->orig_type
= type
;
1351 bidi_check_type (bidi_it
->orig_type
);
1354 bidi_it
->prev_was_pdf
= 0;
1356 bidi_it
->type_after_w1
= UNKNOWN_BT
;
1362 bidi_it
->type_after_w1
= type
;
1363 bidi_check_type (bidi_it
->type_after_w1
);
1364 type
= WEAK_BN
; /* X9/Retaining */
1365 if (bidi_it
->ignore_bn_limit
<= -1)
1367 if (current_level
<= BIDI_MAXLEVEL
- 4)
1369 /* Compute the least odd embedding level greater than
1370 the current level. */
1371 new_level
= ((current_level
+ 1) & ~1) + 1;
1372 if (bidi_it
->type_after_w1
== RLE
)
1373 override
= NEUTRAL_DIR
;
1376 if (current_level
== BIDI_MAXLEVEL
- 4)
1377 bidi_it
->invalid_rl_levels
= 0;
1378 bidi_push_embedding_level (bidi_it
, new_level
, override
);
1382 bidi_it
->invalid_levels
++;
1383 /* See the commentary about invalid_rl_levels below. */
1384 if (bidi_it
->invalid_rl_levels
< 0)
1385 bidi_it
->invalid_rl_levels
= 0;
1386 bidi_it
->invalid_rl_levels
++;
1389 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1390 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1395 bidi_it
->type_after_w1
= type
;
1396 bidi_check_type (bidi_it
->type_after_w1
);
1397 type
= WEAK_BN
; /* X9/Retaining */
1398 if (bidi_it
->ignore_bn_limit
<= -1)
1400 if (current_level
<= BIDI_MAXLEVEL
- 5)
1402 /* Compute the least even embedding level greater than
1403 the current level. */
1404 new_level
= ((current_level
+ 2) & ~1);
1405 if (bidi_it
->type_after_w1
== LRE
)
1406 override
= NEUTRAL_DIR
;
1409 bidi_push_embedding_level (bidi_it
, new_level
, override
);
1413 bidi_it
->invalid_levels
++;
1414 /* invalid_rl_levels counts invalid levels encountered
1415 while the embedding level was already too high for
1416 LRE/LRO, but not for RLE/RLO. That is because
1417 there may be exactly one PDF which we should not
1418 ignore even though invalid_levels is non-zero.
1419 invalid_rl_levels helps to know what PDF is
1421 if (bidi_it
->invalid_rl_levels
>= 0)
1422 bidi_it
->invalid_rl_levels
++;
1425 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1426 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1430 bidi_it
->type_after_w1
= type
;
1431 bidi_check_type (bidi_it
->type_after_w1
);
1432 type
= WEAK_BN
; /* X9/Retaining */
1433 if (bidi_it
->ignore_bn_limit
<= -1)
1435 if (!bidi_it
->invalid_rl_levels
)
1437 new_level
= bidi_pop_embedding_level (bidi_it
);
1438 bidi_it
->invalid_rl_levels
= -1;
1439 if (bidi_it
->invalid_levels
)
1440 bidi_it
->invalid_levels
--;
1441 /* else nothing: UAX#9 says to ignore invalid PDFs */
1443 if (!bidi_it
->invalid_levels
)
1444 new_level
= bidi_pop_embedding_level (bidi_it
);
1447 bidi_it
->invalid_levels
--;
1448 bidi_it
->invalid_rl_levels
--;
1451 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* W5/Retaining */
1452 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1460 bidi_it
->type
= type
;
1461 bidi_check_type (bidi_it
->type
);
1466 /* Given an iterator state in BIDI_IT, advance one character position
1467 in the buffer/string to the next character (in the logical order),
1468 resolve any explicit embeddings and directional overrides, and
1469 return the embedding level of the character after resolving
1470 explicit directives and ignoring empty embeddings. */
1472 bidi_resolve_explicit (struct bidi_it
*bidi_it
)
1474 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1475 int new_level
= bidi_resolve_explicit_1 (bidi_it
);
1476 EMACS_INT eob
= bidi_it
->string
.s
? bidi_it
->string
.schars
: ZV
;
1477 const unsigned char *s
= STRINGP (bidi_it
->string
.lstring
)
1478 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1480 if (prev_level
< new_level
1481 && bidi_it
->type
== WEAK_BN
1482 && bidi_it
->ignore_bn_limit
== -1 /* only if not already known */
1483 && bidi_it
->charpos
< eob
/* not already at EOB */
1484 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1485 + bidi_it
->ch_len
, s
,
1486 bidi_it
->string
.unibyte
)))
1488 /* Avoid pushing and popping embedding levels if the level run
1489 is empty, as this breaks level runs where it shouldn't.
1490 UAX#9 removes all the explicit embedding and override codes,
1491 so empty embeddings disappear without a trace. We need to
1492 behave as if we did the same. */
1493 struct bidi_it saved_it
;
1494 int level
= prev_level
;
1496 bidi_copy_it (&saved_it
, bidi_it
);
1498 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it
->bytepos
1499 + bidi_it
->ch_len
, s
,
1500 bidi_it
->string
.unibyte
)))
1502 /* This advances to the next character, skipping any
1503 characters covered by display strings. */
1504 level
= bidi_resolve_explicit_1 (bidi_it
);
1505 /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1506 a pointer to its data is no longer valid. */
1507 if (STRINGP (bidi_it
->string
.lstring
))
1508 s
= SDATA (bidi_it
->string
.lstring
);
1511 if (bidi_it
->nchars
<= 0)
1513 if (level
== prev_level
) /* empty embedding */
1514 saved_it
.ignore_bn_limit
= bidi_it
->charpos
+ bidi_it
->nchars
;
1515 else /* this embedding is non-empty */
1516 saved_it
.ignore_bn_limit
= -2;
1518 bidi_copy_it (bidi_it
, &saved_it
);
1519 if (bidi_it
->ignore_bn_limit
> -1)
1521 /* We pushed a level, but we shouldn't have. Undo that. */
1522 if (!bidi_it
->invalid_rl_levels
)
1524 new_level
= bidi_pop_embedding_level (bidi_it
);
1525 bidi_it
->invalid_rl_levels
= -1;
1526 if (bidi_it
->invalid_levels
)
1527 bidi_it
->invalid_levels
--;
1529 if (!bidi_it
->invalid_levels
)
1530 new_level
= bidi_pop_embedding_level (bidi_it
);
1533 bidi_it
->invalid_levels
--;
1534 bidi_it
->invalid_rl_levels
--;
1539 if (bidi_it
->type
== NEUTRAL_B
) /* X8 */
1541 bidi_set_paragraph_end (bidi_it
);
1542 /* This is needed by bidi_resolve_weak below, and in L1. */
1543 bidi_it
->type_after_w1
= bidi_it
->type
;
1544 bidi_check_type (bidi_it
->type_after_w1
);
1550 /* Advance in the buffer/string, resolve weak types and return the
1551 type of the next character after weak type resolution. */
1553 bidi_resolve_weak (struct bidi_it
*bidi_it
)
1556 bidi_dir_t override
;
1557 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1558 int new_level
= bidi_resolve_explicit (bidi_it
);
1560 bidi_type_t type_of_next
;
1561 struct bidi_it saved_it
;
1563 (STRINGP (bidi_it
->string
.lstring
) || bidi_it
->string
.s
)
1564 ? bidi_it
->string
.schars
: ZV
;
1566 type
= bidi_it
->type
;
1567 override
= bidi_it
->level_stack
[bidi_it
->stack_idx
].override
;
1569 if (type
== UNKNOWN_BT
1577 if (new_level
!= prev_level
1578 || bidi_it
->type
== NEUTRAL_B
)
1580 /* We've got a new embedding level run, compute the directional
1581 type of sor and initialize per-run variables (UAX#9, clause
1583 bidi_set_sor_type (bidi_it
, prev_level
, new_level
);
1585 else if (type
== NEUTRAL_S
|| type
== NEUTRAL_WS
1586 || type
== WEAK_BN
|| type
== STRONG_AL
)
1587 bidi_it
->type_after_w1
= type
; /* needed in L1 */
1588 bidi_check_type (bidi_it
->type_after_w1
);
1590 /* Level and directional override status are already recorded in
1591 bidi_it, and do not need any change; see X6. */
1592 if (override
== R2L
) /* X6 */
1594 else if (override
== L2R
)
1598 if (type
== WEAK_NSM
) /* W1 */
1600 /* Note that we don't need to consider the case where the
1601 prev character has its type overridden by an RLO or LRO,
1602 because then either the type of this NSM would have been
1603 also overridden, or the previous character is outside the
1604 current level run, and thus not relevant to this NSM.
1605 This is why NSM gets the type_after_w1 of the previous
1607 if (bidi_it
->prev
.type_after_w1
!= UNKNOWN_BT
1608 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1609 && bidi_it
->prev
.type_after_w1
!= NEUTRAL_B
)
1610 type
= bidi_it
->prev
.type_after_w1
;
1611 else if (bidi_it
->sor
== R2L
)
1613 else if (bidi_it
->sor
== L2R
)
1615 else /* shouldn't happen! */
1618 if (type
== WEAK_EN
/* W2 */
1619 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)
1621 else if (type
== STRONG_AL
) /* W3 */
1623 else if ((type
== WEAK_ES
/* W4 */
1624 && bidi_it
->prev
.type_after_w1
== WEAK_EN
1625 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1627 && ((bidi_it
->prev
.type_after_w1
== WEAK_EN
1628 && bidi_it
->prev
.orig_type
== WEAK_EN
)
1629 || bidi_it
->prev
.type_after_w1
== WEAK_AN
)))
1631 const unsigned char *s
=
1632 STRINGP (bidi_it
->string
.lstring
)
1633 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1636 bidi_it
->charpos
+ bidi_it
->nchars
>= eob
1638 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
, s
,
1639 bidi_it
->string
.unibyte
);
1640 type_of_next
= bidi_get_type (next_char
, override
);
1642 if (type_of_next
== WEAK_BN
1643 || bidi_explicit_dir_char (next_char
))
1645 bidi_copy_it (&saved_it
, bidi_it
);
1646 while (bidi_resolve_explicit (bidi_it
) == new_level
1647 && bidi_it
->type
== WEAK_BN
)
1649 type_of_next
= bidi_it
->type
;
1650 bidi_copy_it (bidi_it
, &saved_it
);
1653 /* If the next character is EN, but the last strong-type
1654 character is AL, that next EN will be changed to AN when
1655 we process it in W2 above. So in that case, this ES
1656 should not be changed into EN. */
1658 && type_of_next
== WEAK_EN
1659 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1661 else if (type
== WEAK_CS
)
1663 if (bidi_it
->prev
.type_after_w1
== WEAK_AN
1664 && (type_of_next
== WEAK_AN
1665 /* If the next character is EN, but the last
1666 strong-type character is AL, EN will be later
1667 changed to AN when we process it in W2 above.
1668 So in that case, this ES should not be
1670 || (type_of_next
== WEAK_EN
1671 && bidi_it
->last_strong
.type_after_w1
== STRONG_AL
)))
1673 else if (bidi_it
->prev
.type_after_w1
== WEAK_EN
1674 && type_of_next
== WEAK_EN
1675 && bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1679 else if (type
== WEAK_ET
/* W5: ET with EN before or after it */
1680 || type
== WEAK_BN
) /* W5/Retaining */
1682 if (bidi_it
->prev
.type_after_w1
== WEAK_EN
/* ET/BN w/EN before it */
1683 || bidi_it
->next_en_pos
> bidi_it
->charpos
)
1685 else /* W5: ET/BN with EN after it. */
1687 EMACS_INT en_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
1688 const unsigned char *s
=
1689 STRINGP (bidi_it
->string
.lstring
)
1690 ? SDATA (bidi_it
->string
.lstring
) : bidi_it
->string
.s
;
1692 if (bidi_it
->nchars
<= 0)
1695 bidi_it
->charpos
+ bidi_it
->nchars
>= eob
1697 : bidi_char_at_pos (bidi_it
->bytepos
+ bidi_it
->ch_len
, s
,
1698 bidi_it
->string
.unibyte
);
1699 type_of_next
= bidi_get_type (next_char
, override
);
1701 if (type_of_next
== WEAK_ET
1702 || type_of_next
== WEAK_BN
1703 || bidi_explicit_dir_char (next_char
))
1705 bidi_copy_it (&saved_it
, bidi_it
);
1706 while (bidi_resolve_explicit (bidi_it
) == new_level
1707 && (bidi_it
->type
== WEAK_BN
1708 || bidi_it
->type
== WEAK_ET
))
1710 type_of_next
= bidi_it
->type
;
1711 en_pos
= bidi_it
->charpos
;
1712 bidi_copy_it (bidi_it
, &saved_it
);
1714 if (type_of_next
== WEAK_EN
)
1716 /* If the last strong character is AL, the EN we've
1717 found will become AN when we get to it (W2). */
1718 if (bidi_it
->last_strong
.type_after_w1
!= STRONG_AL
)
1721 /* Remember this EN position, to speed up processing
1723 bidi_it
->next_en_pos
= en_pos
;
1725 else if (type
== WEAK_BN
)
1726 type
= NEUTRAL_ON
; /* W6/Retaining */
1732 if (type
== WEAK_ES
|| type
== WEAK_ET
|| type
== WEAK_CS
/* W6 */
1734 && (bidi_it
->prev
.type_after_w1
== WEAK_CS
/* W6/Retaining */
1735 || bidi_it
->prev
.type_after_w1
== WEAK_ES
1736 || bidi_it
->prev
.type_after_w1
== WEAK_ET
)))
1739 /* Store the type we've got so far, before we clobber it with strong
1740 types in W7 and while resolving neutral types. But leave alone
1741 the original types that were recorded above, because we will need
1742 them for the L1 clause. */
1743 if (bidi_it
->type_after_w1
== UNKNOWN_BT
)
1744 bidi_it
->type_after_w1
= type
;
1745 bidi_check_type (bidi_it
->type_after_w1
);
1747 if (type
== WEAK_EN
) /* W7 */
1749 if ((bidi_it
->last_strong
.type_after_w1
== STRONG_L
)
1750 || (bidi_it
->last_strong
.type
== UNKNOWN_BT
&& bidi_it
->sor
== L2R
))
1754 bidi_it
->type
= type
;
1755 bidi_check_type (bidi_it
->type
);
1759 /* Resolve the type of a neutral character according to the type of
1760 surrounding strong text and the current embedding level. */
1761 static inline bidi_type_t
1762 bidi_resolve_neutral_1 (bidi_type_t prev_type
, bidi_type_t next_type
, int lev
)
1764 /* N1: European and Arabic numbers are treated as though they were R. */
1765 if (next_type
== WEAK_EN
|| next_type
== WEAK_AN
)
1766 next_type
= STRONG_R
;
1767 if (prev_type
== WEAK_EN
|| prev_type
== WEAK_AN
)
1768 prev_type
= STRONG_R
;
1770 if (next_type
== prev_type
) /* N1 */
1772 else if ((lev
& 1) == 0) /* N2 */
1779 bidi_resolve_neutral (struct bidi_it
*bidi_it
)
1781 int prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1782 bidi_type_t type
= bidi_resolve_weak (bidi_it
);
1783 int current_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1785 if (!(type
== STRONG_R
1790 || type
== NEUTRAL_B
1791 || type
== NEUTRAL_S
1792 || type
== NEUTRAL_WS
1793 || type
== NEUTRAL_ON
))
1796 if (bidi_get_category (type
) == NEUTRAL
1797 || (type
== WEAK_BN
&& prev_level
== current_level
))
1799 if (bidi_it
->next_for_neutral
.type
!= UNKNOWN_BT
)
1800 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
1801 bidi_it
->next_for_neutral
.type
,
1805 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1806 the assumption of batch-style processing; see clauses W4,
1807 W5, and especially N1, which require to look far forward
1808 (as well as back) in the buffer/string. May the fleas of
1809 a thousand camels infest the armpits of those who design
1810 supposedly general-purpose algorithms by looking at their
1811 own implementations, and fail to consider other possible
1813 struct bidi_it saved_it
;
1814 bidi_type_t next_type
;
1816 if (bidi_it
->scan_dir
== -1)
1819 bidi_copy_it (&saved_it
, bidi_it
);
1820 /* Scan the text forward until we find the first non-neutral
1821 character, and then use that to resolve the neutral we
1822 are dealing with now. We also cache the scanned iterator
1823 states, to salvage some of the effort later. */
1824 bidi_cache_iterator_state (bidi_it
, 0);
1826 /* Record the info about the previous character, so that
1827 it will be cached below with this state. */
1828 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1829 && bidi_it
->type
!= WEAK_BN
)
1830 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1831 type
= bidi_resolve_weak (bidi_it
);
1832 /* Paragraph separators have their levels fully resolved
1833 at this point, so cache them as resolved. */
1834 bidi_cache_iterator_state (bidi_it
, type
== NEUTRAL_B
);
1835 /* FIXME: implement L1 here, by testing for a newline and
1836 resetting the level for any sequence of whitespace
1837 characters adjacent to it. */
1838 } while (!(type
== NEUTRAL_B
1840 && bidi_get_category (type
) != NEUTRAL
)
1841 /* This is all per level run, so stop when we
1842 reach the end of this level run. */
1843 || bidi_it
->level_stack
[bidi_it
->stack_idx
].level
!=
1846 bidi_remember_char (&saved_it
.next_for_neutral
, bidi_it
);
1857 /* N1: ``European and Arabic numbers are treated as
1858 though they were R.'' */
1859 next_type
= STRONG_R
;
1860 saved_it
.next_for_neutral
.type
= STRONG_R
;
1863 if (!bidi_explicit_dir_char (bidi_it
->ch
))
1864 abort (); /* can't happen: BNs are skipped */
1867 /* Marched all the way to the end of this level run.
1868 We need to use the eor type, whose information is
1869 stored by bidi_set_sor_type in the prev_for_neutral
1871 if (saved_it
.type
!= WEAK_BN
1872 || bidi_get_category (bidi_it
->prev
.type_after_w1
) == NEUTRAL
)
1874 next_type
= bidi_it
->prev_for_neutral
.type
;
1875 saved_it
.next_for_neutral
.type
= next_type
;
1876 bidi_check_type (next_type
);
1880 /* This is a BN which does not adjoin neutrals.
1881 Leave its type alone. */
1882 bidi_copy_it (bidi_it
, &saved_it
);
1883 return bidi_it
->type
;
1889 type
= bidi_resolve_neutral_1 (saved_it
.prev_for_neutral
.type
,
1890 next_type
, current_level
);
1891 saved_it
.type
= type
;
1892 bidi_check_type (type
);
1893 bidi_copy_it (bidi_it
, &saved_it
);
1899 /* Given an iterator state in BIDI_IT, advance one character position
1900 in the buffer/string to the next character (in the logical order),
1901 resolve the bidi type of that next character, and return that
1904 bidi_type_of_next_char (struct bidi_it
*bidi_it
)
1908 /* This should always be called during a forward scan. */
1909 if (bidi_it
->scan_dir
!= 1)
1912 /* Reset the limit until which to ignore BNs if we step out of the
1913 area where we found only empty levels. */
1914 if ((bidi_it
->ignore_bn_limit
> -1
1915 && bidi_it
->ignore_bn_limit
<= bidi_it
->charpos
)
1916 || (bidi_it
->ignore_bn_limit
== -2
1917 && !bidi_explicit_dir_char (bidi_it
->ch
)))
1918 bidi_it
->ignore_bn_limit
= -1;
1920 type
= bidi_resolve_neutral (bidi_it
);
1925 /* Given an iterator state BIDI_IT, advance one character position in
1926 the buffer/string to the next character (in the current scan
1927 direction), resolve the embedding and implicit levels of that next
1928 character, and return the resulting level. */
1930 bidi_level_of_next_char (struct bidi_it
*bidi_it
)
1933 int level
, prev_level
= -1;
1934 struct bidi_saved_info next_for_neutral
;
1935 EMACS_INT next_char_pos
= -2;
1937 if (bidi_it
->scan_dir
== 1)
1940 (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
1941 ? bidi_it
->string
.schars
: ZV
;
1943 /* There's no sense in trying to advance if we hit end of text. */
1944 if (bidi_it
->charpos
>= eob
)
1945 return bidi_it
->resolved_level
;
1947 /* Record the info about the previous character. */
1948 if (bidi_it
->type_after_w1
!= WEAK_BN
/* W1/Retaining */
1949 && bidi_it
->type
!= WEAK_BN
)
1950 bidi_remember_char (&bidi_it
->prev
, bidi_it
);
1951 if (bidi_it
->type_after_w1
== STRONG_R
1952 || bidi_it
->type_after_w1
== STRONG_L
1953 || bidi_it
->type_after_w1
== STRONG_AL
)
1954 bidi_remember_char (&bidi_it
->last_strong
, bidi_it
);
1955 /* FIXME: it sounds like we don't need both prev and
1956 prev_for_neutral members, but I'm leaving them both for now. */
1957 if (bidi_it
->type
== STRONG_R
|| bidi_it
->type
== STRONG_L
1958 || bidi_it
->type
== WEAK_EN
|| bidi_it
->type
== WEAK_AN
)
1959 bidi_remember_char (&bidi_it
->prev_for_neutral
, bidi_it
);
1961 /* If we overstepped the characters used for resolving neutrals
1962 and whitespace, invalidate their info in the iterator. */
1963 if (bidi_it
->charpos
>= bidi_it
->next_for_neutral
.charpos
)
1964 bidi_it
->next_for_neutral
.type
= UNKNOWN_BT
;
1965 if (bidi_it
->next_en_pos
>= 0
1966 && bidi_it
->charpos
>= bidi_it
->next_en_pos
)
1967 bidi_it
->next_en_pos
= -1;
1968 if (bidi_it
->next_for_ws
.type
!= UNKNOWN_BT
1969 && bidi_it
->charpos
>= bidi_it
->next_for_ws
.charpos
)
1970 bidi_it
->next_for_ws
.type
= UNKNOWN_BT
;
1972 /* This must be taken before we fill the iterator with the info
1973 about the next char. If we scan backwards, the iterator
1974 state must be already cached, so there's no need to know the
1975 embedding level of the previous character, since we will be
1976 returning to our caller shortly. */
1977 prev_level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
1979 next_for_neutral
= bidi_it
->next_for_neutral
;
1981 /* Perhaps the character we want is already cached. If it is, the
1982 call to bidi_cache_find below will return a type other than
1984 if (bidi_cache_idx
> bidi_cache_start
&& !bidi_it
->first_elt
)
1987 (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
)) ? 0 : 1;
1989 if (bidi_it
->scan_dir
> 0)
1991 if (bidi_it
->nchars
<= 0)
1993 next_char_pos
= bidi_it
->charpos
+ bidi_it
->nchars
;
1995 else if (bidi_it
->charpos
>= bob
)
1996 /* Implementation note: we allow next_char_pos to be as low as
1997 0 for buffers or -1 for strings, and that is okay because
1998 that's the "position" of the sentinel iterator state we
1999 cached at the beginning of the iteration. */
2000 next_char_pos
= bidi_it
->charpos
- 1;
2001 if (next_char_pos
>= bob
- 1)
2002 type
= bidi_cache_find (next_char_pos
, -1, bidi_it
);
2008 if (type
!= UNKNOWN_BT
)
2010 /* Don't lose the information for resolving neutrals! The
2011 cached states could have been cached before their
2012 next_for_neutral member was computed. If we are on our way
2013 forward, we can simply take the info from the previous
2015 if (bidi_it
->scan_dir
== 1
2016 && bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
2017 bidi_it
->next_for_neutral
= next_for_neutral
;
2019 /* If resolved_level is -1, it means this state was cached
2020 before it was completely resolved, so we cannot return
2022 if (bidi_it
->resolved_level
!= -1)
2023 return bidi_it
->resolved_level
;
2025 if (bidi_it
->scan_dir
== -1)
2026 /* If we are going backwards, the iterator state is already cached
2027 from previous scans, and should be fully resolved. */
2030 if (type
== UNKNOWN_BT
)
2031 type
= bidi_type_of_next_char (bidi_it
);
2033 if (type
== NEUTRAL_B
)
2034 return bidi_it
->resolved_level
;
2036 level
= bidi_it
->level_stack
[bidi_it
->stack_idx
].level
;
2037 if ((bidi_get_category (type
) == NEUTRAL
/* && type != NEUTRAL_B */)
2038 || (type
== WEAK_BN
&& prev_level
== level
))
2040 if (bidi_it
->next_for_neutral
.type
== UNKNOWN_BT
)
2043 /* If the cached state shows a neutral character, it was not
2044 resolved by bidi_resolve_neutral, so do it now. */
2045 type
= bidi_resolve_neutral_1 (bidi_it
->prev_for_neutral
.type
,
2046 bidi_it
->next_for_neutral
.type
,
2050 if (!(type
== STRONG_R
2054 || type
== WEAK_AN
))
2056 bidi_it
->type
= type
;
2057 bidi_check_type (bidi_it
->type
);
2059 /* For L1 below, we need to know, for each WS character, whether
2060 it belongs to a sequence of WS characters preceding a newline
2061 or a TAB or a paragraph separator. */
2062 if (bidi_it
->orig_type
== NEUTRAL_WS
2063 && bidi_it
->next_for_ws
.type
== UNKNOWN_BT
)
2066 EMACS_INT clen
= bidi_it
->ch_len
;
2067 EMACS_INT bpos
= bidi_it
->bytepos
;
2068 EMACS_INT cpos
= bidi_it
->charpos
;
2069 EMACS_INT disp_pos
= bidi_it
->disp_pos
;
2070 EMACS_INT nc
= bidi_it
->nchars
;
2071 struct bidi_string_data bs
= bidi_it
->string
;
2073 int fwp
= bidi_it
->frame_window_p
;
2074 int dpp
= bidi_it
->disp_prop_p
;
2076 if (bidi_it
->nchars
<= 0)
2079 ch
= bidi_fetch_char (bpos
+= clen
, cpos
+= nc
, &disp_pos
, &dpp
, &bs
,
2081 if (ch
== '\n' || ch
== BIDI_EOB
/* || ch == LINESEP_CHAR */)
2084 chtype
= bidi_get_type (ch
, NEUTRAL_DIR
);
2085 } while (chtype
== NEUTRAL_WS
|| chtype
== WEAK_BN
2086 || bidi_explicit_dir_char (ch
)); /* L1/Retaining */
2087 bidi_it
->next_for_ws
.type
= chtype
;
2088 bidi_check_type (bidi_it
->next_for_ws
.type
);
2089 bidi_it
->next_for_ws
.charpos
= cpos
;
2090 bidi_it
->next_for_ws
.bytepos
= bpos
;
2093 /* Resolve implicit levels, with a twist: PDFs get the embedding
2094 level of the enbedding they terminate. See below for the
2096 if (bidi_it
->orig_type
== PDF
2097 /* Don't do this if this formatting code didn't change the
2098 embedding level due to invalid or empty embeddings. */
2099 && prev_level
!= level
)
2101 /* Don't look in UAX#9 for the reason for this: it's our own
2102 private quirk. The reason is that we want the formatting
2103 codes to be delivered so that they bracket the text of their
2104 embedding. For example, given the text
2108 we want it to be displayed as
2116 which will result because we bump up the embedding level as
2117 soon as we see the RLO and pop it as soon as we see the PDF,
2118 so RLO itself has the same embedding level as "teST", and
2119 thus would be normally delivered last, just before the PDF.
2120 The switch below fiddles with the level of PDF so that this
2121 ugly side effect does not happen.
2123 (This is, of course, only important if the formatting codes
2124 are actually displayed, but Emacs does need to display them
2125 if the user wants to.) */
2128 else if (bidi_it
->orig_type
== NEUTRAL_B
/* L1 */
2129 || bidi_it
->orig_type
== NEUTRAL_S
2130 || bidi_it
->ch
== '\n' || bidi_it
->ch
== BIDI_EOB
2131 /* || bidi_it->ch == LINESEP_CHAR */
2132 || (bidi_it
->orig_type
== NEUTRAL_WS
2133 && (bidi_it
->next_for_ws
.type
== NEUTRAL_B
2134 || bidi_it
->next_for_ws
.type
== NEUTRAL_S
)))
2135 level
= bidi_it
->level_stack
[0].level
;
2136 else if ((level
& 1) == 0) /* I1 */
2138 if (type
== STRONG_R
)
2140 else if (type
== WEAK_EN
|| type
== WEAK_AN
)
2145 if (type
== STRONG_L
|| type
== WEAK_EN
|| type
== WEAK_AN
)
2149 bidi_it
->resolved_level
= level
;
2153 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
2154 non-zero, we are at the end of a level, and we need to prepare to
2155 resume the scan of the lower level.
2157 If this level's other edge is cached, we simply jump to it, filling
2158 the iterator structure with the iterator state on the other edge.
2159 Otherwise, we walk the buffer or string until we come back to the
2160 same level as LEVEL.
2162 Note: we are not talking here about a ``level run'' in the UAX#9
2163 sense of the term, but rather about a ``level'' which includes
2164 all the levels higher than it. In other words, given the levels
2167 11111112222222333333334443343222222111111112223322111
2170 and assuming we are at point A scanning left to right, this
2171 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2174 bidi_find_other_level_edge (struct bidi_it
*bidi_it
, int level
, int end_flag
)
2176 int dir
= end_flag
? -bidi_it
->scan_dir
: bidi_it
->scan_dir
;
2179 /* Try the cache first. */
2180 if ((idx
= bidi_cache_find_level_change (level
, dir
, end_flag
))
2181 >= bidi_cache_start
)
2182 bidi_cache_fetch_state (idx
, bidi_it
);
2188 abort (); /* if we are at end of level, its edges must be cached */
2190 bidi_cache_iterator_state (bidi_it
, 1);
2192 new_level
= bidi_level_of_next_char (bidi_it
);
2193 bidi_cache_iterator_state (bidi_it
, 1);
2194 } while (new_level
>= level
);
2199 bidi_move_to_visually_next (struct bidi_it
*bidi_it
)
2201 int old_level
, new_level
, next_level
;
2202 struct bidi_it sentinel
;
2203 struct gcpro gcpro1
;
2205 if (bidi_it
->charpos
< 0 || bidi_it
->bytepos
< 0)
2208 if (bidi_it
->scan_dir
== 0)
2210 bidi_it
->scan_dir
= 1; /* default to logical order */
2213 /* The code below can call eval, and thus cause GC. If we are
2214 iterating a Lisp string, make sure it won't be GCed. */
2215 if (STRINGP (bidi_it
->string
.lstring
))
2216 GCPRO1 (bidi_it
->string
.lstring
);
2218 /* If we just passed a newline, initialize for the next line. */
2219 if (!bidi_it
->first_elt
&& bidi_it
->orig_type
== NEUTRAL_B
)
2220 bidi_line_init (bidi_it
);
2222 /* Prepare the sentinel iterator state, and cache it. When we bump
2223 into it, scanning backwards, we'll know that the last non-base
2224 level is exhausted. */
2225 if (bidi_cache_idx
== bidi_cache_start
)
2227 bidi_copy_it (&sentinel
, bidi_it
);
2228 if (bidi_it
->first_elt
)
2230 sentinel
.charpos
--; /* cached charpos needs to be monotonic */
2232 sentinel
.ch
= '\n'; /* doesn't matter, but why not? */
2233 sentinel
.ch_len
= 1;
2234 sentinel
.nchars
= 1;
2236 bidi_cache_iterator_state (&sentinel
, 1);
2239 old_level
= bidi_it
->resolved_level
;
2240 new_level
= bidi_level_of_next_char (bidi_it
);
2242 /* Reordering of resolved levels (clause L2) is implemented by
2243 jumping to the other edge of the level and flipping direction of
2244 scanning the text whenever we find a level change. */
2245 if (new_level
!= old_level
)
2247 int ascending
= new_level
> old_level
;
2248 int level_to_search
= ascending
? old_level
+ 1 : old_level
;
2249 int incr
= ascending
? 1 : -1;
2250 int expected_next_level
= old_level
+ incr
;
2252 /* Jump (or walk) to the other edge of this level. */
2253 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2254 /* Switch scan direction and peek at the next character in the
2256 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2258 /* The following loop handles the case where the resolved level
2259 jumps by more than one. This is typical for numbers inside a
2260 run of text with left-to-right embedding direction, but can
2261 also happen in other situations. In those cases the decision
2262 where to continue after a level change, and in what direction,
2263 is tricky. For example, given a text like below:
2268 (where the numbers below the text show the resolved levels),
2269 the result of reordering according to UAX#9 should be this:
2273 This is implemented by the loop below which flips direction
2274 and jumps to the other edge of the level each time it finds
2275 the new level not to be the expected one. The expected level
2276 is always one more or one less than the previous one. */
2277 next_level
= bidi_peek_at_next_level (bidi_it
);
2278 while (next_level
!= expected_next_level
)
2280 expected_next_level
+= incr
;
2281 level_to_search
+= incr
;
2282 bidi_find_other_level_edge (bidi_it
, level_to_search
, !ascending
);
2283 bidi_it
->scan_dir
= -bidi_it
->scan_dir
;
2284 next_level
= bidi_peek_at_next_level (bidi_it
);
2287 /* Finally, deliver the next character in the new direction. */
2288 next_level
= bidi_level_of_next_char (bidi_it
);
2291 /* Take note when we have just processed the newline that precedes
2292 the end of the paragraph. The next time we are about to be
2293 called, set_iterator_to_next will automatically reinit the
2294 paragraph direction, if needed. We do this at the newline before
2295 the paragraph separator, because the next character might not be
2296 the first character of the next paragraph, due to the bidi
2297 reordering, whereas we _must_ know the paragraph base direction
2298 _before_ we process the paragraph's text, since the base
2299 direction affects the reordering. */
2300 if (bidi_it
->scan_dir
== 1 && bidi_it
->orig_type
== NEUTRAL_B
)
2302 /* The paragraph direction of the entire string, once
2303 determined, is in effect for the entire string. Setting the
2304 separator limit to the end of the string prevents
2305 bidi_paragraph_init from being called automatically on this
2307 if (bidi_it
->string
.s
|| STRINGP (bidi_it
->string
.lstring
))
2308 bidi_it
->separator_limit
= bidi_it
->string
.schars
;
2309 else if (bidi_it
->bytepos
< ZV_BYTE
)
2312 bidi_at_paragraph_end (bidi_it
->charpos
+ bidi_it
->nchars
,
2313 bidi_it
->bytepos
+ bidi_it
->ch_len
);
2314 if (bidi_it
->nchars
<= 0)
2318 bidi_it
->new_paragraph
= 1;
2319 /* Record the buffer position of the last character of the
2320 paragraph separator. */
2321 bidi_it
->separator_limit
=
2322 bidi_it
->charpos
+ bidi_it
->nchars
+ sep_len
;
2327 if (bidi_it
->scan_dir
== 1 && bidi_cache_idx
> bidi_cache_start
)
2329 /* If we are at paragraph's base embedding level and beyond the
2330 last cached position, the cache's job is done and we can
2332 if (bidi_it
->resolved_level
== bidi_it
->level_stack
[0].level
2333 && bidi_it
->charpos
> (bidi_cache
[bidi_cache_idx
- 1].charpos
2334 + bidi_cache
[bidi_cache_idx
- 1].nchars
- 1))
2335 bidi_cache_reset ();
2336 /* But as long as we are caching during forward scan, we must
2337 cache each state, or else the cache integrity will be
2338 compromised: it assumes cached states correspond to buffer
2341 bidi_cache_iterator_state (bidi_it
, 1);
2344 if (STRINGP (bidi_it
->string
.lstring
))
2348 /* This is meant to be called from within the debugger, whenever you
2349 wish to examine the cache contents. */
2350 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE
;
2352 bidi_dump_cached_states (void)
2357 if (bidi_cache_idx
== 0)
2359 fprintf (stderr
, "The cache is empty.\n");
2362 fprintf (stderr
, "Total of %"pD
"d state%s in cache:\n",
2363 bidi_cache_idx
, bidi_cache_idx
== 1 ? "" : "s");
2365 for (i
= bidi_cache
[bidi_cache_idx
- 1].charpos
; i
> 0; i
/= 10)
2367 fputs ("ch ", stderr
);
2368 for (i
= 0; i
< bidi_cache_idx
; i
++)
2369 fprintf (stderr
, "%*c", ndigits
, bidi_cache
[i
].ch
);
2370 fputs ("\n", stderr
);
2371 fputs ("lvl ", stderr
);
2372 for (i
= 0; i
< bidi_cache_idx
; i
++)
2373 fprintf (stderr
, "%*d", ndigits
, bidi_cache
[i
].resolved_level
);
2374 fputs ("\n", stderr
);
2375 fputs ("pos ", stderr
);
2376 for (i
= 0; i
< bidi_cache_idx
; i
++)
2377 fprintf (stderr
, "%*"pI
"d", ndigits
, bidi_cache
[i
].charpos
);
2378 fputs ("\n", stderr
);