Merge from trunk.
[bpt/emacs.git] / src / bidi.c
1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
3 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
19
20 /* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
23 as per UAX#9, a part of the Unicode Standard.
24
25 Unlike the reference and most other implementations, this one is
26 designed to be called once for every character in the buffer or
27 string.
28
29 The main entry point is bidi_move_to_visually_next. Each time it
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
36 character by resolving their levels on the fly.
37
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
41 argument character.
42
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
46
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
51
52 #include <config.h>
53 #include <stdio.h>
54 #include <setjmp.h>
55
56 #include "lisp.h"
57 #include "buffer.h"
58 #include "character.h"
59 #include "dispextern.h"
60
61 static int bidi_initialized = 0;
62
63 static Lisp_Object bidi_type_table, bidi_mirror_table;
64
65 #define LRM_CHAR 0x200E
66 #define RLM_CHAR 0x200F
67 #define BIDI_EOB -1
68
69 /* Local data structures. (Look in dispextern.h for the rest.) */
70
71 /* What we need to know about the current paragraph. */
72 struct bidi_paragraph_info {
73 EMACS_INT start_bytepos; /* byte position where it begins */
74 EMACS_INT end_bytepos; /* byte position where it ends */
75 int embedding_level; /* its basic embedding level */
76 bidi_dir_t base_dir; /* its base direction */
77 };
78
79 /* Data type for describing the bidirectional character categories. */
80 typedef enum {
81 UNKNOWN_BC,
82 NEUTRAL,
83 WEAK,
84 STRONG
85 } bidi_category_t;
86
87 extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
88 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
89
90 static Lisp_Object paragraph_start_re, paragraph_separate_re;
91 static Lisp_Object Qparagraph_start, Qparagraph_separate;
92
93 static void
94 bidi_initialize (void)
95 {
96
97 #include "biditype.h"
98 #include "bidimirror.h"
99
100 int i;
101
102 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
103 staticpro (&bidi_type_table);
104
105 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
106 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
107 make_number (bidi_type[i].type));
108
109 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
110 staticpro (&bidi_mirror_table);
111
112 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
113 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
114 make_number (bidi_mirror[i].to));
115
116 Qparagraph_start = intern ("paragraph-start");
117 staticpro (&Qparagraph_start);
118 paragraph_start_re = Fsymbol_value (Qparagraph_start);
119 if (!STRINGP (paragraph_start_re))
120 paragraph_start_re = build_string ("\f\\|[ \t]*$");
121 staticpro (&paragraph_start_re);
122 Qparagraph_separate = intern ("paragraph-separate");
123 staticpro (&Qparagraph_separate);
124 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
125 if (!STRINGP (paragraph_separate_re))
126 paragraph_separate_re = build_string ("[ \t\f]*$");
127 staticpro (&paragraph_separate_re);
128 bidi_initialized = 1;
129 }
130
131 /* Return the bidi type of a character CH, subject to the current
132 directional OVERRIDE. */
133 static inline bidi_type_t
134 bidi_get_type (int ch, bidi_dir_t override)
135 {
136 bidi_type_t default_type;
137
138 if (ch == BIDI_EOB)
139 return NEUTRAL_B;
140 if (ch < 0 || ch > MAX_CHAR)
141 abort ();
142
143 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
144
145 if (override == NEUTRAL_DIR)
146 return default_type;
147
148 switch (default_type)
149 {
150 /* Although UAX#9 does not tell, it doesn't make sense to
151 override NEUTRAL_B and LRM/RLM characters. */
152 case NEUTRAL_B:
153 case LRE:
154 case LRO:
155 case RLE:
156 case RLO:
157 case PDF:
158 return default_type;
159 default:
160 switch (ch)
161 {
162 case LRM_CHAR:
163 case RLM_CHAR:
164 return default_type;
165 default:
166 if (override == L2R) /* X6 */
167 return STRONG_L;
168 else if (override == R2L)
169 return STRONG_R;
170 else
171 abort (); /* can't happen: handled above */
172 }
173 }
174 }
175
176 static void
177 bidi_check_type (bidi_type_t type)
178 {
179 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
180 abort ();
181 }
182
183 /* Given a bidi TYPE of a character, return its category. */
184 static inline bidi_category_t
185 bidi_get_category (bidi_type_t type)
186 {
187 switch (type)
188 {
189 case UNKNOWN_BT:
190 return UNKNOWN_BC;
191 case STRONG_L:
192 case STRONG_R:
193 case STRONG_AL:
194 case LRE:
195 case LRO:
196 case RLE:
197 case RLO:
198 return STRONG;
199 case PDF: /* ??? really?? */
200 case WEAK_EN:
201 case WEAK_ES:
202 case WEAK_ET:
203 case WEAK_AN:
204 case WEAK_CS:
205 case WEAK_NSM:
206 case WEAK_BN:
207 return WEAK;
208 case NEUTRAL_B:
209 case NEUTRAL_S:
210 case NEUTRAL_WS:
211 case NEUTRAL_ON:
212 return NEUTRAL;
213 default:
214 abort ();
215 }
216 }
217
218 /* Return the mirrored character of C, if it has one. If C has no
219 mirrored counterpart, return C.
220 Note: The conditions in UAX#9 clause L4 regarding the surrounding
221 context must be tested by the caller. */
222 int
223 bidi_mirror_char (int c)
224 {
225 Lisp_Object val;
226
227 if (c == BIDI_EOB)
228 return c;
229 if (c < 0 || c > MAX_CHAR)
230 abort ();
231
232 val = CHAR_TABLE_REF (bidi_mirror_table, c);
233 if (INTEGERP (val))
234 {
235 int v = XINT (val);
236
237 if (v < 0 || v > MAX_CHAR)
238 abort ();
239
240 return v;
241 }
242
243 return c;
244 }
245
246 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
247 copies the part of the level stack that is actually in use. */
248 static inline void
249 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
250 {
251 int i;
252
253 /* Copy everything except the level stack and beyond. */
254 memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
255
256 /* Copy the active part of the level stack. */
257 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
258 for (i = 1; i <= from->stack_idx; i++)
259 to->level_stack[i] = from->level_stack[i];
260 }
261
262 /* Caching the bidi iterator states. */
263
264 #define BIDI_CACHE_CHUNK 200
265 static struct bidi_it *bidi_cache;
266 static ptrdiff_t bidi_cache_size = 0;
267 enum { elsz = sizeof (struct bidi_it) };
268 static ptrdiff_t bidi_cache_idx; /* next unused cache slot */
269 static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */
270
271 static inline void
272 bidi_cache_reset (void)
273 {
274 bidi_cache_idx = 0;
275 bidi_cache_last_idx = -1;
276 }
277
278 static inline void
279 bidi_cache_shrink (void)
280 {
281 if (bidi_cache_size > BIDI_CACHE_CHUNK)
282 {
283 bidi_cache_size = BIDI_CACHE_CHUNK;
284 bidi_cache =
285 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
286 }
287 bidi_cache_reset ();
288 }
289
290 static inline void
291 bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
292 {
293 int current_scan_dir = bidi_it->scan_dir;
294
295 if (idx < 0 || idx >= bidi_cache_idx)
296 abort ();
297
298 bidi_copy_it (bidi_it, &bidi_cache[idx]);
299 bidi_it->scan_dir = current_scan_dir;
300 bidi_cache_last_idx = idx;
301 }
302
303 /* Find a cached state with a given CHARPOS and resolved embedding
304 level less or equal to LEVEL. if LEVEL is -1, disregard the
305 resolved levels in cached states. DIR, if non-zero, means search
306 in that direction from the last cache hit. */
307 static inline ptrdiff_t
308 bidi_cache_search (EMACS_INT charpos, int level, int dir)
309 {
310 ptrdiff_t i, i_start;
311
312 if (bidi_cache_idx)
313 {
314 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
315 {
316 dir = -1;
317 i_start = bidi_cache_last_idx - 1;
318 }
319 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
320 + bidi_cache[bidi_cache_last_idx].nchars - 1))
321 {
322 dir = 1;
323 i_start = bidi_cache_last_idx + 1;
324 }
325 else if (dir)
326 i_start = bidi_cache_last_idx;
327 else
328 {
329 dir = -1;
330 i_start = bidi_cache_idx - 1;
331 }
332
333 if (dir < 0)
334 {
335 /* Linear search for now; FIXME! */
336 for (i = i_start; i >= 0; i--)
337 if (bidi_cache[i].charpos <= charpos
338 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
339 && (level == -1 || bidi_cache[i].resolved_level <= level))
340 return i;
341 }
342 else
343 {
344 for (i = i_start; i < bidi_cache_idx; i++)
345 if (bidi_cache[i].charpos <= charpos
346 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
347 && (level == -1 || bidi_cache[i].resolved_level <= level))
348 return i;
349 }
350 }
351
352 return -1;
353 }
354
355 /* Find a cached state where the resolved level changes to a value
356 that is lower than LEVEL, and return its cache slot index. DIR is
357 the direction to search, starting with the last used cache slot.
358 BEFORE, if non-zero, means return the index of the slot that is
359 ``before'' the level change in the search direction. That is,
360 given the cached levels like this:
361
362 1122333442211
363 AB C
364
365 and assuming we are at the position cached at the slot marked with
366 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
367 index of slot B or A, depending whether BEFORE is, respectively,
368 non-zero or zero. */
369 static ptrdiff_t
370 bidi_cache_find_level_change (int level, int dir, int before)
371 {
372 if (bidi_cache_idx)
373 {
374 ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
375 int incr = before ? 1 : 0;
376
377 if (!dir)
378 dir = -1;
379 else if (!incr)
380 i += dir;
381
382 if (dir < 0)
383 {
384 while (i >= incr)
385 {
386 if (bidi_cache[i - incr].resolved_level >= 0
387 && bidi_cache[i - incr].resolved_level < level)
388 return i;
389 i--;
390 }
391 }
392 else
393 {
394 while (i < bidi_cache_idx - incr)
395 {
396 if (bidi_cache[i + incr].resolved_level >= 0
397 && bidi_cache[i + incr].resolved_level < level)
398 return i;
399 i++;
400 }
401 }
402 }
403
404 return -1;
405 }
406
407 static inline void
408 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
409 {
410 ptrdiff_t idx;
411
412 /* We should never cache on backward scans. */
413 if (bidi_it->scan_dir == -1)
414 abort ();
415 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
416
417 if (idx < 0)
418 {
419 idx = bidi_cache_idx;
420 /* Enlarge the cache as needed. */
421 if (idx >= bidi_cache_size)
422 {
423 if (min (PTRDIFF_MAX, SIZE_MAX) / elsz - BIDI_CACHE_CHUNK
424 < bidi_cache_size)
425 memory_full (SIZE_MAX);
426 bidi_cache_size += BIDI_CACHE_CHUNK;
427 bidi_cache =
428 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
429 }
430 /* Character positions should correspond to cache positions 1:1.
431 If we are outside the range of cached positions, the cache is
432 useless and must be reset. */
433 if (idx > 0 &&
434 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
435 + bidi_cache[idx - 1].nchars)
436 || bidi_it->charpos < bidi_cache[0].charpos))
437 {
438 bidi_cache_reset ();
439 idx = 0;
440 }
441 if (bidi_it->nchars <= 0)
442 abort ();
443 bidi_copy_it (&bidi_cache[idx], bidi_it);
444 if (!resolved)
445 bidi_cache[idx].resolved_level = -1;
446 }
447 else
448 {
449 /* Copy only the members which could have changed, to avoid
450 costly copying of the entire struct. */
451 bidi_cache[idx].type = bidi_it->type;
452 bidi_check_type (bidi_it->type);
453 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
454 bidi_check_type (bidi_it->type_after_w1);
455 if (resolved)
456 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
457 else
458 bidi_cache[idx].resolved_level = -1;
459 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
460 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
461 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
462 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
463 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
464 }
465
466 bidi_cache_last_idx = idx;
467 if (idx >= bidi_cache_idx)
468 bidi_cache_idx = idx + 1;
469 }
470
471 static inline bidi_type_t
472 bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
473 {
474 ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
475
476 if (i >= 0)
477 {
478 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
479
480 bidi_copy_it (bidi_it, &bidi_cache[i]);
481 bidi_cache_last_idx = i;
482 /* Don't let scan direction from from the cached state override
483 the current scan direction. */
484 bidi_it->scan_dir = current_scan_dir;
485 return bidi_it->type;
486 }
487
488 return UNKNOWN_BT;
489 }
490
491 static inline int
492 bidi_peek_at_next_level (struct bidi_it *bidi_it)
493 {
494 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
495 abort ();
496 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
497 }
498
499 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
500 Value is the non-negative length of the paragraph separator
501 following the buffer position, -1 if position is at the beginning
502 of a new paragraph, or -2 if position is neither at beginning nor
503 at end of a paragraph. */
504 static EMACS_INT
505 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
506 {
507 Lisp_Object sep_re;
508 Lisp_Object start_re;
509 EMACS_INT val;
510
511 sep_re = paragraph_separate_re;
512 start_re = paragraph_start_re;
513
514 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
515 if (val < 0)
516 {
517 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
518 val = -1;
519 else
520 val = -2;
521 }
522
523 return val;
524 }
525
526 /* Determine the start-of-run (sor) directional type given the two
527 embedding levels on either side of the run boundary. Also, update
528 the saved info about previously seen characters, since that info is
529 generally valid for a single level run. */
530 static inline void
531 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
532 {
533 int higher_level = level_before > level_after ? level_before : level_after;
534
535 /* The prev_was_pdf gork is required for when we have several PDFs
536 in a row. In that case, we want to compute the sor type for the
537 next level run only once: when we see the first PDF. That's
538 because the sor type depends only on the higher of the two levels
539 that we find on the two sides of the level boundary (see UAX#9,
540 clause X10), and so we don't need to know the final embedding
541 level to which we descend after processing all the PDFs. */
542 if (!bidi_it->prev_was_pdf || level_before < level_after)
543 /* FIXME: should the default sor direction be user selectable? */
544 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
545 if (level_before > level_after)
546 bidi_it->prev_was_pdf = 1;
547
548 bidi_it->prev.type = UNKNOWN_BT;
549 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
550 bidi_it->last_strong.orig_type = UNKNOWN_BT;
551 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
552 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
553 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
554 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
555 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
556 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
557 }
558
559 /* Perform initializations for reordering a new line of bidi text. */
560 static void
561 bidi_line_init (struct bidi_it *bidi_it)
562 {
563 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
564 bidi_it->resolved_level = bidi_it->level_stack[0].level;
565 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
566 bidi_it->invalid_levels = 0;
567 bidi_it->invalid_rl_levels = -1;
568 bidi_it->next_en_pos = -1;
569 bidi_it->next_for_ws.type = UNKNOWN_BT;
570 bidi_set_sor_type (bidi_it,
571 bidi_it->paragraph_dir == R2L ? 1 : 0,
572 bidi_it->level_stack[0].level); /* X10 */
573
574 bidi_cache_reset ();
575 }
576
577 /* Fetch and return the character at BYTEPOS/CHARPOS. If that
578 character is covered by a display string, treat the entire run of
579 covered characters as a single character u+FFFC, and return their
580 combined length in CH_LEN and NCHARS. DISP_POS specifies the
581 character position of the next display string, or -1 if not yet
582 computed. When the next character is at or beyond that position,
583 the function updates DISP_POS with the position of the next display
584 string. */
585 static inline int
586 bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
587 int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
588 {
589 int ch;
590
591 /* FIXME: Support strings in addition to buffers. */
592 /* If we got past the last known position of display string, compute
593 the position of the next one. That position could be at BYTEPOS. */
594 if (charpos < ZV && charpos > *disp_pos)
595 *disp_pos = compute_display_string_pos (charpos, frame_window_p);
596
597 /* Fetch the character at BYTEPOS. */
598 if (bytepos >= ZV_BYTE)
599 {
600 ch = BIDI_EOB;
601 *ch_len = 1;
602 *nchars = 1;
603 *disp_pos = ZV;
604 }
605 else if (charpos >= *disp_pos)
606 {
607 EMACS_INT disp_end_pos;
608
609 /* We don't expect to find ourselves in the middle of a display
610 property. Hopefully, it will never be needed. */
611 if (charpos > *disp_pos)
612 abort ();
613 /* Return the Unicode Object Replacement Character to represent
614 the entire run of characters covered by the display
615 string. */
616 ch = 0xFFFC;
617 disp_end_pos = compute_display_string_end (*disp_pos);
618 *nchars = disp_end_pos - *disp_pos;
619 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
620 }
621 else
622 {
623 ch = FETCH_MULTIBYTE_CHAR (bytepos);
624 *nchars = 1;
625 *ch_len = CHAR_BYTES (ch);
626 }
627
628 /* If we just entered a run of characters covered by a display
629 string, compute the position of the next display string. */
630 if (charpos + *nchars <= ZV && charpos + *nchars > *disp_pos)
631 *disp_pos = compute_display_string_pos (charpos + *nchars, frame_window_p);
632
633 return ch;
634 }
635
636 /* Find the beginning of this paragraph by looking back in the buffer.
637 Value is the byte position of the paragraph's beginning. */
638 static EMACS_INT
639 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
640 {
641 Lisp_Object re = paragraph_start_re;
642 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
643
644 while (pos_byte > BEGV_BYTE
645 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
646 {
647 /* FIXME: What if the paragraph beginning is covered by a
648 display string? And what if a display string covering some
649 of the text over which we scan back includes
650 paragraph_start_re? */
651 pos = find_next_newline_no_quit (pos - 1, -1);
652 pos_byte = CHAR_TO_BYTE (pos);
653 }
654 return pos_byte;
655 }
656
657 /* Determine the base direction, a.k.a. base embedding level, of the
658 paragraph we are about to iterate through. If DIR is either L2R or
659 R2L, just use that. Otherwise, determine the paragraph direction
660 from the first strong directional character of the paragraph.
661
662 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
663 has no strong directional characters and both DIR and
664 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
665 in the buffer until a paragraph is found with a strong character,
666 or until hitting BEGV. In the latter case, fall back to L2R. This
667 flag is used in current-bidi-paragraph-direction.
668
669 Note that this function gives the paragraph separator the same
670 direction as the preceding paragraph, even though Emacs generally
671 views the separartor as not belonging to any paragraph. */
672 void
673 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
674 {
675 EMACS_INT bytepos = bidi_it->bytepos;
676 EMACS_INT pstartbyte;
677
678 /* Special case for an empty buffer. */
679 if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
680 dir = L2R;
681 /* We should never be called at EOB or before BEGV. */
682 else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
683 abort ();
684
685 if (dir == L2R)
686 {
687 bidi_it->paragraph_dir = L2R;
688 bidi_it->new_paragraph = 0;
689 }
690 else if (dir == R2L)
691 {
692 bidi_it->paragraph_dir = R2L;
693 bidi_it->new_paragraph = 0;
694 }
695 else if (dir == NEUTRAL_DIR) /* P2 */
696 {
697 int ch;
698 EMACS_INT ch_len, nchars;
699 EMACS_INT pos, disp_pos = -1;
700 bidi_type_t type;
701
702 if (!bidi_initialized)
703 bidi_initialize ();
704
705 /* If we are inside a paragraph separator, we are just waiting
706 for the separator to be exhausted; use the previous paragraph
707 direction. But don't do that if we have been just reseated,
708 because we need to reinitialize below in that case. */
709 if (!bidi_it->first_elt
710 && bidi_it->charpos < bidi_it->separator_limit)
711 return;
712
713 /* If we are on a newline, get past it to where the next
714 paragraph might start. But don't do that at BEGV since then
715 we are potentially in a new paragraph that doesn't yet
716 exist. */
717 pos = bidi_it->charpos;
718 if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
719 {
720 bytepos++;
721 pos++;
722 }
723
724 /* We are either at the beginning of a paragraph or in the
725 middle of it. Find where this paragraph starts. */
726 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
727 bidi_it->separator_limit = -1;
728 bidi_it->new_paragraph = 0;
729
730 /* The following loop is run more than once only if NO_DEFAULT_P
731 is non-zero. */
732 do {
733 bytepos = pstartbyte;
734 pos = BYTE_TO_CHAR (bytepos);
735 ch = bidi_fetch_char (bytepos, pos, &disp_pos, bidi_it->frame_window_p,
736 &ch_len, &nchars);
737 type = bidi_get_type (ch, NEUTRAL_DIR);
738
739 for (pos += nchars, bytepos += ch_len;
740 /* NOTE: UAX#9 says to search only for L, AL, or R types
741 of characters, and ignore RLE, RLO, LRE, and LRO.
742 However, I'm not sure it makes sense to omit those 4;
743 should try with and without that to see the effect. */
744 (bidi_get_category (type) != STRONG)
745 || (bidi_ignore_explicit_marks_for_paragraph_level
746 && (type == RLE || type == RLO
747 || type == LRE || type == LRO));
748 type = bidi_get_type (ch, NEUTRAL_DIR))
749 {
750 if (bytepos >= ZV_BYTE)
751 {
752 /* Pretend there's a paragraph separator at end of
753 buffer. */
754 type = NEUTRAL_B;
755 break;
756 }
757 if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
758 break;
759 /* Fetch next character and advance to get past it. */
760 ch = bidi_fetch_char (bytepos, pos, &disp_pos,
761 bidi_it->frame_window_p, &ch_len, &nchars);
762 pos += nchars;
763 bytepos += ch_len;
764 }
765 if (type == STRONG_R || type == STRONG_AL) /* P3 */
766 bidi_it->paragraph_dir = R2L;
767 else if (type == STRONG_L)
768 bidi_it->paragraph_dir = L2R;
769 if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
770 {
771 /* If this paragraph is at BEGV, default to L2R. */
772 if (pstartbyte == BEGV_BYTE)
773 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
774 else
775 {
776 EMACS_INT prevpbyte = pstartbyte;
777 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
778
779 /* Find the beginning of the previous paragraph, if any. */
780 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
781 {
782 /* FXIME: What if p is covered by a display
783 string? See also a FIXME inside
784 bidi_find_paragraph_start. */
785 p--;
786 pbyte = CHAR_TO_BYTE (p);
787 prevpbyte = bidi_find_paragraph_start (p, pbyte);
788 }
789 pstartbyte = prevpbyte;
790 }
791 }
792 } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
793 }
794 else
795 abort ();
796
797 /* Contrary to UAX#9 clause P3, we only default the paragraph
798 direction to L2R if we have no previous usable paragraph
799 direction. This is allowed by the HL1 clause. */
800 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
801 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
802 if (bidi_it->paragraph_dir == R2L)
803 bidi_it->level_stack[0].level = 1;
804 else
805 bidi_it->level_stack[0].level = 0;
806
807 bidi_line_init (bidi_it);
808 }
809
810 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
811 end. */
812 static inline void
813 bidi_set_paragraph_end (struct bidi_it *bidi_it)
814 {
815 bidi_it->invalid_levels = 0;
816 bidi_it->invalid_rl_levels = -1;
817 bidi_it->stack_idx = 0;
818 bidi_it->resolved_level = bidi_it->level_stack[0].level;
819 }
820
821 /* Initialize the bidi iterator from buffer/string position CHARPOS. */
822 void
823 bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
824 struct bidi_it *bidi_it)
825 {
826 if (! bidi_initialized)
827 bidi_initialize ();
828 bidi_it->charpos = charpos;
829 bidi_it->bytepos = bytepos;
830 bidi_it->frame_window_p = frame_window_p;
831 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
832 bidi_it->first_elt = 1;
833 bidi_set_paragraph_end (bidi_it);
834 bidi_it->new_paragraph = 1;
835 bidi_it->separator_limit = -1;
836 bidi_it->type = NEUTRAL_B;
837 bidi_it->type_after_w1 = NEUTRAL_B;
838 bidi_it->orig_type = NEUTRAL_B;
839 bidi_it->prev_was_pdf = 0;
840 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
841 bidi_it->prev.orig_type = UNKNOWN_BT;
842 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
843 bidi_it->last_strong.orig_type = UNKNOWN_BT;
844 bidi_it->next_for_neutral.charpos = -1;
845 bidi_it->next_for_neutral.type =
846 bidi_it->next_for_neutral.type_after_w1 =
847 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
848 bidi_it->prev_for_neutral.charpos = -1;
849 bidi_it->prev_for_neutral.type =
850 bidi_it->prev_for_neutral.type_after_w1 =
851 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
852 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
853 bidi_it->disp_pos = -1; /* invalid/unknown */
854 bidi_cache_shrink ();
855 }
856
857 /* Push the current embedding level and override status; reset the
858 current level to LEVEL and the current override status to OVERRIDE. */
859 static inline void
860 bidi_push_embedding_level (struct bidi_it *bidi_it,
861 int level, bidi_dir_t override)
862 {
863 bidi_it->stack_idx++;
864 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
865 abort ();
866 bidi_it->level_stack[bidi_it->stack_idx].level = level;
867 bidi_it->level_stack[bidi_it->stack_idx].override = override;
868 }
869
870 /* Pop the embedding level and directional override status from the
871 stack, and return the new level. */
872 static inline int
873 bidi_pop_embedding_level (struct bidi_it *bidi_it)
874 {
875 /* UAX#9 says to ignore invalid PDFs. */
876 if (bidi_it->stack_idx > 0)
877 bidi_it->stack_idx--;
878 return bidi_it->level_stack[bidi_it->stack_idx].level;
879 }
880
881 /* Record in SAVED_INFO the information about the current character. */
882 static inline void
883 bidi_remember_char (struct bidi_saved_info *saved_info,
884 struct bidi_it *bidi_it)
885 {
886 saved_info->charpos = bidi_it->charpos;
887 saved_info->bytepos = bidi_it->bytepos;
888 saved_info->type = bidi_it->type;
889 bidi_check_type (bidi_it->type);
890 saved_info->type_after_w1 = bidi_it->type_after_w1;
891 bidi_check_type (bidi_it->type_after_w1);
892 saved_info->orig_type = bidi_it->orig_type;
893 bidi_check_type (bidi_it->orig_type);
894 }
895
896 /* Resolve the type of a neutral character according to the type of
897 surrounding strong text and the current embedding level. */
898 static inline bidi_type_t
899 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
900 {
901 /* N1: European and Arabic numbers are treated as though they were R. */
902 if (next_type == WEAK_EN || next_type == WEAK_AN)
903 next_type = STRONG_R;
904 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
905 prev_type = STRONG_R;
906
907 if (next_type == prev_type) /* N1 */
908 return next_type;
909 else if ((lev & 1) == 0) /* N2 */
910 return STRONG_L;
911 else
912 return STRONG_R;
913 }
914
915 static inline int
916 bidi_explicit_dir_char (int ch)
917 {
918 bidi_type_t ch_type;
919
920 if (!bidi_initialized)
921 abort ();
922 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
923 return (ch_type == LRE || ch_type == LRO
924 || ch_type == RLE || ch_type == RLO
925 || ch_type == PDF);
926 }
927
928 /* A helper function for bidi_resolve_explicit. It advances to the
929 next character in logical order and determines the new embedding
930 level and directional override, but does not take into account
931 empty embeddings. */
932 static int
933 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
934 {
935 int curchar;
936 bidi_type_t type;
937 int current_level;
938 int new_level;
939 bidi_dir_t override;
940
941 /* If reseat()'ed, don't advance, so as to start iteration from the
942 position where we were reseated. bidi_it->bytepos can be less
943 than BEGV_BYTE after reseat to BEGV. */
944 if (bidi_it->bytepos < BEGV_BYTE
945 || bidi_it->first_elt)
946 {
947 bidi_it->first_elt = 0;
948 if (bidi_it->charpos < BEGV)
949 bidi_it->charpos = BEGV;
950 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
951 }
952 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
953 {
954 /* Advance to the next character, skipping characters covered by
955 display strings (nchars > 1). */
956 if (bidi_it->nchars <= 0)
957 abort ();
958 bidi_it->charpos += bidi_it->nchars;
959 if (bidi_it->ch_len == 0)
960 abort ();
961 bidi_it->bytepos += bidi_it->ch_len;
962 }
963
964 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
965 override = bidi_it->level_stack[bidi_it->stack_idx].override;
966 new_level = current_level;
967
968 if (bidi_it->bytepos >= ZV_BYTE)
969 {
970 curchar = BIDI_EOB;
971 bidi_it->ch_len = 1;
972 bidi_it->nchars = 1;
973 bidi_it->disp_pos = ZV;
974 }
975 else
976 {
977 /* Fetch the character at BYTEPOS. If it is covered by a
978 display string, treat the entire run of covered characters as
979 a single character u+FFFC. */
980 curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
981 &bidi_it->disp_pos, bidi_it->frame_window_p,
982 &bidi_it->ch_len, &bidi_it->nchars);
983 }
984 bidi_it->ch = curchar;
985
986 /* Don't apply directional override here, as all the types we handle
987 below will not be affected by the override anyway, and we need
988 the original type unaltered. The override will be applied in
989 bidi_resolve_weak. */
990 type = bidi_get_type (curchar, NEUTRAL_DIR);
991 bidi_it->orig_type = type;
992 bidi_check_type (bidi_it->orig_type);
993
994 if (type != PDF)
995 bidi_it->prev_was_pdf = 0;
996
997 bidi_it->type_after_w1 = UNKNOWN_BT;
998
999 switch (type)
1000 {
1001 case RLE: /* X2 */
1002 case RLO: /* X4 */
1003 bidi_it->type_after_w1 = type;
1004 bidi_check_type (bidi_it->type_after_w1);
1005 type = WEAK_BN; /* X9/Retaining */
1006 if (bidi_it->ignore_bn_limit <= 0)
1007 {
1008 if (current_level <= BIDI_MAXLEVEL - 4)
1009 {
1010 /* Compute the least odd embedding level greater than
1011 the current level. */
1012 new_level = ((current_level + 1) & ~1) + 1;
1013 if (bidi_it->type_after_w1 == RLE)
1014 override = NEUTRAL_DIR;
1015 else
1016 override = R2L;
1017 if (current_level == BIDI_MAXLEVEL - 4)
1018 bidi_it->invalid_rl_levels = 0;
1019 bidi_push_embedding_level (bidi_it, new_level, override);
1020 }
1021 else
1022 {
1023 bidi_it->invalid_levels++;
1024 /* See the commentary about invalid_rl_levels below. */
1025 if (bidi_it->invalid_rl_levels < 0)
1026 bidi_it->invalid_rl_levels = 0;
1027 bidi_it->invalid_rl_levels++;
1028 }
1029 }
1030 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1031 || bidi_it->next_en_pos > bidi_it->charpos)
1032 type = WEAK_EN;
1033 break;
1034 case LRE: /* X3 */
1035 case LRO: /* X5 */
1036 bidi_it->type_after_w1 = type;
1037 bidi_check_type (bidi_it->type_after_w1);
1038 type = WEAK_BN; /* X9/Retaining */
1039 if (bidi_it->ignore_bn_limit <= 0)
1040 {
1041 if (current_level <= BIDI_MAXLEVEL - 5)
1042 {
1043 /* Compute the least even embedding level greater than
1044 the current level. */
1045 new_level = ((current_level + 2) & ~1);
1046 if (bidi_it->type_after_w1 == LRE)
1047 override = NEUTRAL_DIR;
1048 else
1049 override = L2R;
1050 bidi_push_embedding_level (bidi_it, new_level, override);
1051 }
1052 else
1053 {
1054 bidi_it->invalid_levels++;
1055 /* invalid_rl_levels counts invalid levels encountered
1056 while the embedding level was already too high for
1057 LRE/LRO, but not for RLE/RLO. That is because
1058 there may be exactly one PDF which we should not
1059 ignore even though invalid_levels is non-zero.
1060 invalid_rl_levels helps to know what PDF is
1061 that. */
1062 if (bidi_it->invalid_rl_levels >= 0)
1063 bidi_it->invalid_rl_levels++;
1064 }
1065 }
1066 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1067 || bidi_it->next_en_pos > bidi_it->charpos)
1068 type = WEAK_EN;
1069 break;
1070 case PDF: /* X7 */
1071 bidi_it->type_after_w1 = type;
1072 bidi_check_type (bidi_it->type_after_w1);
1073 type = WEAK_BN; /* X9/Retaining */
1074 if (bidi_it->ignore_bn_limit <= 0)
1075 {
1076 if (!bidi_it->invalid_rl_levels)
1077 {
1078 new_level = bidi_pop_embedding_level (bidi_it);
1079 bidi_it->invalid_rl_levels = -1;
1080 if (bidi_it->invalid_levels)
1081 bidi_it->invalid_levels--;
1082 /* else nothing: UAX#9 says to ignore invalid PDFs */
1083 }
1084 if (!bidi_it->invalid_levels)
1085 new_level = bidi_pop_embedding_level (bidi_it);
1086 else
1087 {
1088 bidi_it->invalid_levels--;
1089 bidi_it->invalid_rl_levels--;
1090 }
1091 }
1092 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1093 || bidi_it->next_en_pos > bidi_it->charpos)
1094 type = WEAK_EN;
1095 break;
1096 default:
1097 /* Nothing. */
1098 break;
1099 }
1100
1101 bidi_it->type = type;
1102 bidi_check_type (bidi_it->type);
1103
1104 return new_level;
1105 }
1106
1107 /* Given an iterator state in BIDI_IT, advance one character position
1108 in the buffer/string to the next character (in the logical order),
1109 resolve any explicit embeddings and directional overrides, and
1110 return the embedding level of the character after resolving
1111 explicit directives and ignoring empty embeddings. */
1112 static int
1113 bidi_resolve_explicit (struct bidi_it *bidi_it)
1114 {
1115 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1116 int new_level = bidi_resolve_explicit_1 (bidi_it);
1117
1118 if (prev_level < new_level
1119 && bidi_it->type == WEAK_BN
1120 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1121 && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
1122 && bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1123 + bidi_it->ch_len)))
1124 {
1125 /* Avoid pushing and popping embedding levels if the level run
1126 is empty, as this breaks level runs where it shouldn't.
1127 UAX#9 removes all the explicit embedding and override codes,
1128 so empty embeddings disappear without a trace. We need to
1129 behave as if we did the same. */
1130 struct bidi_it saved_it;
1131 int level = prev_level;
1132
1133 bidi_copy_it (&saved_it, bidi_it);
1134
1135 while (bidi_explicit_dir_char (FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1136 + bidi_it->ch_len)))
1137 {
1138 /* This advances to the next character, skipping any
1139 characters covered by display strings. */
1140 level = bidi_resolve_explicit_1 (bidi_it);
1141 }
1142
1143 if (bidi_it->nchars <= 0)
1144 abort ();
1145 if (level == prev_level) /* empty embedding */
1146 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
1147 else /* this embedding is non-empty */
1148 saved_it.ignore_bn_limit = -1;
1149
1150 bidi_copy_it (bidi_it, &saved_it);
1151 if (bidi_it->ignore_bn_limit > 0)
1152 {
1153 /* We pushed a level, but we shouldn't have. Undo that. */
1154 if (!bidi_it->invalid_rl_levels)
1155 {
1156 new_level = bidi_pop_embedding_level (bidi_it);
1157 bidi_it->invalid_rl_levels = -1;
1158 if (bidi_it->invalid_levels)
1159 bidi_it->invalid_levels--;
1160 }
1161 if (!bidi_it->invalid_levels)
1162 new_level = bidi_pop_embedding_level (bidi_it);
1163 else
1164 {
1165 bidi_it->invalid_levels--;
1166 bidi_it->invalid_rl_levels--;
1167 }
1168 }
1169 }
1170
1171 if (bidi_it->type == NEUTRAL_B) /* X8 */
1172 {
1173 bidi_set_paragraph_end (bidi_it);
1174 /* This is needed by bidi_resolve_weak below, and in L1. */
1175 bidi_it->type_after_w1 = bidi_it->type;
1176 bidi_check_type (bidi_it->type_after_w1);
1177 }
1178
1179 return new_level;
1180 }
1181
1182 /* Advance in the buffer/string, resolve weak types and return the
1183 type of the next character after weak type resolution. */
1184 static bidi_type_t
1185 bidi_resolve_weak (struct bidi_it *bidi_it)
1186 {
1187 bidi_type_t type;
1188 bidi_dir_t override;
1189 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1190 int new_level = bidi_resolve_explicit (bidi_it);
1191 int next_char;
1192 bidi_type_t type_of_next;
1193 struct bidi_it saved_it;
1194
1195 type = bidi_it->type;
1196 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1197
1198 if (type == UNKNOWN_BT
1199 || type == LRE
1200 || type == LRO
1201 || type == RLE
1202 || type == RLO
1203 || type == PDF)
1204 abort ();
1205
1206 if (new_level != prev_level
1207 || bidi_it->type == NEUTRAL_B)
1208 {
1209 /* We've got a new embedding level run, compute the directional
1210 type of sor and initialize per-run variables (UAX#9, clause
1211 X10). */
1212 bidi_set_sor_type (bidi_it, prev_level, new_level);
1213 }
1214 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1215 || type == WEAK_BN || type == STRONG_AL)
1216 bidi_it->type_after_w1 = type; /* needed in L1 */
1217 bidi_check_type (bidi_it->type_after_w1);
1218
1219 /* Level and directional override status are already recorded in
1220 bidi_it, and do not need any change; see X6. */
1221 if (override == R2L) /* X6 */
1222 type = STRONG_R;
1223 else if (override == L2R)
1224 type = STRONG_L;
1225 else
1226 {
1227 if (type == WEAK_NSM) /* W1 */
1228 {
1229 /* Note that we don't need to consider the case where the
1230 prev character has its type overridden by an RLO or LRO,
1231 because then either the type of this NSM would have been
1232 also overridden, or the previous character is outside the
1233 current level run, and thus not relevant to this NSM.
1234 This is why NSM gets the type_after_w1 of the previous
1235 character. */
1236 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1237 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1238 && bidi_it->prev.type_after_w1 != NEUTRAL_B)
1239 type = bidi_it->prev.type_after_w1;
1240 else if (bidi_it->sor == R2L)
1241 type = STRONG_R;
1242 else if (bidi_it->sor == L2R)
1243 type = STRONG_L;
1244 else /* shouldn't happen! */
1245 abort ();
1246 }
1247 if (type == WEAK_EN /* W2 */
1248 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1249 type = WEAK_AN;
1250 else if (type == STRONG_AL) /* W3 */
1251 type = STRONG_R;
1252 else if ((type == WEAK_ES /* W4 */
1253 && bidi_it->prev.type_after_w1 == WEAK_EN
1254 && bidi_it->prev.orig_type == WEAK_EN)
1255 || (type == WEAK_CS
1256 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1257 && bidi_it->prev.orig_type == WEAK_EN)
1258 || bidi_it->prev.type_after_w1 == WEAK_AN)))
1259 {
1260 next_char =
1261 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1262 ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1263 + bidi_it->ch_len);
1264 type_of_next = bidi_get_type (next_char, override);
1265
1266 if (type_of_next == WEAK_BN
1267 || bidi_explicit_dir_char (next_char))
1268 {
1269 bidi_copy_it (&saved_it, bidi_it);
1270 while (bidi_resolve_explicit (bidi_it) == new_level
1271 && bidi_it->type == WEAK_BN)
1272 ;
1273 type_of_next = bidi_it->type;
1274 bidi_copy_it (bidi_it, &saved_it);
1275 }
1276
1277 /* If the next character is EN, but the last strong-type
1278 character is AL, that next EN will be changed to AN when
1279 we process it in W2 above. So in that case, this ES
1280 should not be changed into EN. */
1281 if (type == WEAK_ES
1282 && type_of_next == WEAK_EN
1283 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1284 type = WEAK_EN;
1285 else if (type == WEAK_CS)
1286 {
1287 if (bidi_it->prev.type_after_w1 == WEAK_AN
1288 && (type_of_next == WEAK_AN
1289 /* If the next character is EN, but the last
1290 strong-type character is AL, EN will be later
1291 changed to AN when we process it in W2 above.
1292 So in that case, this ES should not be
1293 changed into EN. */
1294 || (type_of_next == WEAK_EN
1295 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1296 type = WEAK_AN;
1297 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1298 && type_of_next == WEAK_EN
1299 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1300 type = WEAK_EN;
1301 }
1302 }
1303 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1304 || type == WEAK_BN) /* W5/Retaining */
1305 {
1306 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1307 || bidi_it->next_en_pos > bidi_it->charpos)
1308 type = WEAK_EN;
1309 else /* W5: ET/BN with EN after it. */
1310 {
1311 EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
1312
1313 if (bidi_it->nchars <= 0)
1314 abort ();
1315 next_char =
1316 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1317 ? BIDI_EOB : FETCH_MULTIBYTE_CHAR (bidi_it->bytepos
1318 + bidi_it->ch_len);
1319 type_of_next = bidi_get_type (next_char, override);
1320
1321 if (type_of_next == WEAK_ET
1322 || type_of_next == WEAK_BN
1323 || bidi_explicit_dir_char (next_char))
1324 {
1325 bidi_copy_it (&saved_it, bidi_it);
1326 while (bidi_resolve_explicit (bidi_it) == new_level
1327 && (bidi_it->type == WEAK_BN
1328 || bidi_it->type == WEAK_ET))
1329 ;
1330 type_of_next = bidi_it->type;
1331 en_pos = bidi_it->charpos;
1332 bidi_copy_it (bidi_it, &saved_it);
1333 }
1334 if (type_of_next == WEAK_EN)
1335 {
1336 /* If the last strong character is AL, the EN we've
1337 found will become AN when we get to it (W2). */
1338 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1339 {
1340 type = WEAK_EN;
1341 /* Remember this EN position, to speed up processing
1342 of the next ETs. */
1343 bidi_it->next_en_pos = en_pos;
1344 }
1345 else if (type == WEAK_BN)
1346 type = NEUTRAL_ON; /* W6/Retaining */
1347 }
1348 }
1349 }
1350 }
1351
1352 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1353 || (type == WEAK_BN
1354 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1355 || bidi_it->prev.type_after_w1 == WEAK_ES
1356 || bidi_it->prev.type_after_w1 == WEAK_ET)))
1357 type = NEUTRAL_ON;
1358
1359 /* Store the type we've got so far, before we clobber it with strong
1360 types in W7 and while resolving neutral types. But leave alone
1361 the original types that were recorded above, because we will need
1362 them for the L1 clause. */
1363 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1364 bidi_it->type_after_w1 = type;
1365 bidi_check_type (bidi_it->type_after_w1);
1366
1367 if (type == WEAK_EN) /* W7 */
1368 {
1369 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1370 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1371 type = STRONG_L;
1372 }
1373
1374 bidi_it->type = type;
1375 bidi_check_type (bidi_it->type);
1376 return type;
1377 }
1378
1379 static bidi_type_t
1380 bidi_resolve_neutral (struct bidi_it *bidi_it)
1381 {
1382 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1383 bidi_type_t type = bidi_resolve_weak (bidi_it);
1384 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1385
1386 if (!(type == STRONG_R
1387 || type == STRONG_L
1388 || type == WEAK_BN
1389 || type == WEAK_EN
1390 || type == WEAK_AN
1391 || type == NEUTRAL_B
1392 || type == NEUTRAL_S
1393 || type == NEUTRAL_WS
1394 || type == NEUTRAL_ON))
1395 abort ();
1396
1397 if (bidi_get_category (type) == NEUTRAL
1398 || (type == WEAK_BN && prev_level == current_level))
1399 {
1400 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1401 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1402 bidi_it->next_for_neutral.type,
1403 current_level);
1404 else
1405 {
1406 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1407 the assumption of batch-style processing; see clauses W4,
1408 W5, and especially N1, which require to look far forward
1409 (as well as back) in the buffer/string. May the fleas of
1410 a thousand camels infest the armpits of those who design
1411 supposedly general-purpose algorithms by looking at their
1412 own implementations, and fail to consider other possible
1413 implementations! */
1414 struct bidi_it saved_it;
1415 bidi_type_t next_type;
1416
1417 if (bidi_it->scan_dir == -1)
1418 abort ();
1419
1420 bidi_copy_it (&saved_it, bidi_it);
1421 /* Scan the text forward until we find the first non-neutral
1422 character, and then use that to resolve the neutral we
1423 are dealing with now. We also cache the scanned iterator
1424 states, to salvage some of the effort later. */
1425 bidi_cache_iterator_state (bidi_it, 0);
1426 do {
1427 /* Record the info about the previous character, so that
1428 it will be cached below with this state. */
1429 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1430 && bidi_it->type != WEAK_BN)
1431 bidi_remember_char (&bidi_it->prev, bidi_it);
1432 type = bidi_resolve_weak (bidi_it);
1433 /* Paragraph separators have their levels fully resolved
1434 at this point, so cache them as resolved. */
1435 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1436 /* FIXME: implement L1 here, by testing for a newline and
1437 resetting the level for any sequence of whitespace
1438 characters adjacent to it. */
1439 } while (!(type == NEUTRAL_B
1440 || (type != WEAK_BN
1441 && bidi_get_category (type) != NEUTRAL)
1442 /* This is all per level run, so stop when we
1443 reach the end of this level run. */
1444 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1445 current_level));
1446
1447 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1448
1449 switch (type)
1450 {
1451 case STRONG_L:
1452 case STRONG_R:
1453 case STRONG_AL:
1454 next_type = type;
1455 break;
1456 case WEAK_EN:
1457 case WEAK_AN:
1458 /* N1: ``European and Arabic numbers are treated as
1459 though they were R.'' */
1460 next_type = STRONG_R;
1461 saved_it.next_for_neutral.type = STRONG_R;
1462 break;
1463 case WEAK_BN:
1464 if (!bidi_explicit_dir_char (bidi_it->ch))
1465 abort (); /* can't happen: BNs are skipped */
1466 /* FALLTHROUGH */
1467 case NEUTRAL_B:
1468 /* Marched all the way to the end of this level run.
1469 We need to use the eor type, whose information is
1470 stored by bidi_set_sor_type in the prev_for_neutral
1471 member. */
1472 if (saved_it.type != WEAK_BN
1473 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1474 {
1475 next_type = bidi_it->prev_for_neutral.type;
1476 saved_it.next_for_neutral.type = next_type;
1477 bidi_check_type (next_type);
1478 }
1479 else
1480 {
1481 /* This is a BN which does not adjoin neutrals.
1482 Leave its type alone. */
1483 bidi_copy_it (bidi_it, &saved_it);
1484 return bidi_it->type;
1485 }
1486 break;
1487 default:
1488 abort ();
1489 }
1490 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1491 next_type, current_level);
1492 saved_it.type = type;
1493 bidi_check_type (type);
1494 bidi_copy_it (bidi_it, &saved_it);
1495 }
1496 }
1497 return type;
1498 }
1499
1500 /* Given an iterator state in BIDI_IT, advance one character position
1501 in the buffer/string to the next character (in the logical order),
1502 resolve the bidi type of that next character, and return that
1503 type. */
1504 static bidi_type_t
1505 bidi_type_of_next_char (struct bidi_it *bidi_it)
1506 {
1507 bidi_type_t type;
1508
1509 /* This should always be called during a forward scan. */
1510 if (bidi_it->scan_dir != 1)
1511 abort ();
1512
1513 /* Reset the limit until which to ignore BNs if we step out of the
1514 area where we found only empty levels. */
1515 if ((bidi_it->ignore_bn_limit > 0
1516 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1517 || (bidi_it->ignore_bn_limit == -1
1518 && !bidi_explicit_dir_char (bidi_it->ch)))
1519 bidi_it->ignore_bn_limit = 0;
1520
1521 type = bidi_resolve_neutral (bidi_it);
1522
1523 return type;
1524 }
1525
1526 /* Given an iterator state BIDI_IT, advance one character position in
1527 the buffer/string to the next character (in the current scan
1528 direction), resolve the embedding and implicit levels of that next
1529 character, and return the resulting level. */
1530 static int
1531 bidi_level_of_next_char (struct bidi_it *bidi_it)
1532 {
1533 bidi_type_t type;
1534 int level, prev_level = -1;
1535 struct bidi_saved_info next_for_neutral;
1536 EMACS_INT next_char_pos;
1537
1538 if (bidi_it->scan_dir == 1)
1539 {
1540 /* There's no sense in trying to advance if we hit end of text. */
1541 if (bidi_it->bytepos >= ZV_BYTE)
1542 return bidi_it->resolved_level;
1543
1544 /* Record the info about the previous character. */
1545 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1546 && bidi_it->type != WEAK_BN)
1547 bidi_remember_char (&bidi_it->prev, bidi_it);
1548 if (bidi_it->type_after_w1 == STRONG_R
1549 || bidi_it->type_after_w1 == STRONG_L
1550 || bidi_it->type_after_w1 == STRONG_AL)
1551 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1552 /* FIXME: it sounds like we don't need both prev and
1553 prev_for_neutral members, but I'm leaving them both for now. */
1554 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1555 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1556 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1557
1558 /* If we overstepped the characters used for resolving neutrals
1559 and whitespace, invalidate their info in the iterator. */
1560 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1561 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1562 if (bidi_it->next_en_pos >= 0
1563 && bidi_it->charpos >= bidi_it->next_en_pos)
1564 bidi_it->next_en_pos = -1;
1565 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1566 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1567 bidi_it->next_for_ws.type = UNKNOWN_BT;
1568
1569 /* This must be taken before we fill the iterator with the info
1570 about the next char. If we scan backwards, the iterator
1571 state must be already cached, so there's no need to know the
1572 embedding level of the previous character, since we will be
1573 returning to our caller shortly. */
1574 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1575 }
1576 next_for_neutral = bidi_it->next_for_neutral;
1577
1578 /* Perhaps the character we want is already cached. If it is, the
1579 call to bidi_cache_find below will return a type other than
1580 UNKNOWN_BT. */
1581 if (bidi_cache_idx && !bidi_it->first_elt)
1582 {
1583 if (bidi_it->scan_dir > 0)
1584 {
1585 if (bidi_it->nchars <= 0)
1586 abort ();
1587 next_char_pos = bidi_it->charpos + bidi_it->nchars;
1588 }
1589 else
1590 next_char_pos = bidi_it->charpos - 1;
1591 type = bidi_cache_find (next_char_pos, -1, bidi_it);
1592 }
1593 else
1594 type = UNKNOWN_BT;
1595 if (type != UNKNOWN_BT)
1596 {
1597 /* Don't lose the information for resolving neutrals! The
1598 cached states could have been cached before their
1599 next_for_neutral member was computed. If we are on our way
1600 forward, we can simply take the info from the previous
1601 state. */
1602 if (bidi_it->scan_dir == 1
1603 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1604 bidi_it->next_for_neutral = next_for_neutral;
1605
1606 /* If resolved_level is -1, it means this state was cached
1607 before it was completely resolved, so we cannot return
1608 it. */
1609 if (bidi_it->resolved_level != -1)
1610 return bidi_it->resolved_level;
1611 }
1612 if (bidi_it->scan_dir == -1)
1613 /* If we are going backwards, the iterator state is already cached
1614 from previous scans, and should be fully resolved. */
1615 abort ();
1616
1617 if (type == UNKNOWN_BT)
1618 type = bidi_type_of_next_char (bidi_it);
1619
1620 if (type == NEUTRAL_B)
1621 return bidi_it->resolved_level;
1622
1623 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1624 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1625 || (type == WEAK_BN && prev_level == level))
1626 {
1627 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1628 abort ();
1629
1630 /* If the cached state shows a neutral character, it was not
1631 resolved by bidi_resolve_neutral, so do it now. */
1632 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1633 bidi_it->next_for_neutral.type,
1634 level);
1635 }
1636
1637 if (!(type == STRONG_R
1638 || type == STRONG_L
1639 || type == WEAK_BN
1640 || type == WEAK_EN
1641 || type == WEAK_AN))
1642 abort ();
1643 bidi_it->type = type;
1644 bidi_check_type (bidi_it->type);
1645
1646 /* For L1 below, we need to know, for each WS character, whether
1647 it belongs to a sequence of WS characters preceding a newline
1648 or a TAB or a paragraph separator. */
1649 if (bidi_it->orig_type == NEUTRAL_WS
1650 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1651 {
1652 int ch;
1653 EMACS_INT clen = bidi_it->ch_len;
1654 EMACS_INT bpos = bidi_it->bytepos;
1655 EMACS_INT cpos = bidi_it->charpos;
1656 EMACS_INT disp_pos = bidi_it->disp_pos;
1657 EMACS_INT nc = bidi_it->nchars;
1658 bidi_type_t chtype;
1659 int fwp = bidi_it->frame_window_p;
1660
1661 if (bidi_it->nchars <= 0)
1662 abort ();
1663 do {
1664 ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, fwp,
1665 &clen, &nc);
1666 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1667 chtype = NEUTRAL_B;
1668 else
1669 chtype = bidi_get_type (ch, NEUTRAL_DIR);
1670 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1671 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1672 bidi_it->next_for_ws.type = chtype;
1673 bidi_check_type (bidi_it->next_for_ws.type);
1674 bidi_it->next_for_ws.charpos = cpos;
1675 bidi_it->next_for_ws.bytepos = bpos;
1676 }
1677
1678 /* Resolve implicit levels, with a twist: PDFs get the embedding
1679 level of the enbedding they terminate. See below for the
1680 reason. */
1681 if (bidi_it->orig_type == PDF
1682 /* Don't do this if this formatting code didn't change the
1683 embedding level due to invalid or empty embeddings. */
1684 && prev_level != level)
1685 {
1686 /* Don't look in UAX#9 for the reason for this: it's our own
1687 private quirk. The reason is that we want the formatting
1688 codes to be delivered so that they bracket the text of their
1689 embedding. For example, given the text
1690
1691 {RLO}teST{PDF}
1692
1693 we want it to be displayed as
1694
1695 {PDF}STet{RLO}
1696
1697 not as
1698
1699 STet{RLO}{PDF}
1700
1701 which will result because we bump up the embedding level as
1702 soon as we see the RLO and pop it as soon as we see the PDF,
1703 so RLO itself has the same embedding level as "teST", and
1704 thus would be normally delivered last, just before the PDF.
1705 The switch below fiddles with the level of PDF so that this
1706 ugly side effect does not happen.
1707
1708 (This is, of course, only important if the formatting codes
1709 are actually displayed, but Emacs does need to display them
1710 if the user wants to.) */
1711 level = prev_level;
1712 }
1713 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1714 || bidi_it->orig_type == NEUTRAL_S
1715 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1716 /* || bidi_it->ch == LINESEP_CHAR */
1717 || (bidi_it->orig_type == NEUTRAL_WS
1718 && (bidi_it->next_for_ws.type == NEUTRAL_B
1719 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1720 level = bidi_it->level_stack[0].level;
1721 else if ((level & 1) == 0) /* I1 */
1722 {
1723 if (type == STRONG_R)
1724 level++;
1725 else if (type == WEAK_EN || type == WEAK_AN)
1726 level += 2;
1727 }
1728 else /* I2 */
1729 {
1730 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1731 level++;
1732 }
1733
1734 bidi_it->resolved_level = level;
1735 return level;
1736 }
1737
1738 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
1739 non-zero, we are at the end of a level, and we need to prepare to
1740 resume the scan of the lower level.
1741
1742 If this level's other edge is cached, we simply jump to it, filling
1743 the iterator structure with the iterator state on the other edge.
1744 Otherwise, we walk the buffer or string until we come back to the
1745 same level as LEVEL.
1746
1747 Note: we are not talking here about a ``level run'' in the UAX#9
1748 sense of the term, but rather about a ``level'' which includes
1749 all the levels higher than it. In other words, given the levels
1750 like this:
1751
1752 11111112222222333333334443343222222111111112223322111
1753 A B C
1754
1755 and assuming we are at point A scanning left to right, this
1756 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1757 at point B. */
1758 static void
1759 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1760 {
1761 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1762 ptrdiff_t idx;
1763
1764 /* Try the cache first. */
1765 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1766 bidi_cache_fetch_state (idx, bidi_it);
1767 else
1768 {
1769 int new_level;
1770
1771 if (end_flag)
1772 abort (); /* if we are at end of level, its edges must be cached */
1773
1774 bidi_cache_iterator_state (bidi_it, 1);
1775 do {
1776 new_level = bidi_level_of_next_char (bidi_it);
1777 bidi_cache_iterator_state (bidi_it, 1);
1778 } while (new_level >= level);
1779 }
1780 }
1781
1782 void
1783 bidi_move_to_visually_next (struct bidi_it *bidi_it)
1784 {
1785 int old_level, new_level, next_level;
1786 struct bidi_it sentinel;
1787
1788 if (bidi_it->scan_dir == 0)
1789 {
1790 bidi_it->scan_dir = 1; /* default to logical order */
1791 }
1792
1793 /* If we just passed a newline, initialize for the next line. */
1794 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1795 bidi_line_init (bidi_it);
1796
1797 /* Prepare the sentinel iterator state, and cache it. When we bump
1798 into it, scanning backwards, we'll know that the last non-base
1799 level is exhausted. */
1800 if (bidi_cache_idx == 0)
1801 {
1802 bidi_copy_it (&sentinel, bidi_it);
1803 if (bidi_it->first_elt)
1804 {
1805 sentinel.charpos--; /* cached charpos needs to be monotonic */
1806 sentinel.bytepos--;
1807 sentinel.ch = '\n'; /* doesn't matter, but why not? */
1808 sentinel.ch_len = 1;
1809 sentinel.nchars = 1;
1810 }
1811 bidi_cache_iterator_state (&sentinel, 1);
1812 }
1813
1814 old_level = bidi_it->resolved_level;
1815 new_level = bidi_level_of_next_char (bidi_it);
1816
1817 /* Reordering of resolved levels (clause L2) is implemented by
1818 jumping to the other edge of the level and flipping direction of
1819 scanning the text whenever we find a level change. */
1820 if (new_level != old_level)
1821 {
1822 int ascending = new_level > old_level;
1823 int level_to_search = ascending ? old_level + 1 : old_level;
1824 int incr = ascending ? 1 : -1;
1825 int expected_next_level = old_level + incr;
1826
1827 /* Jump (or walk) to the other edge of this level. */
1828 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1829 /* Switch scan direction and peek at the next character in the
1830 new direction. */
1831 bidi_it->scan_dir = -bidi_it->scan_dir;
1832
1833 /* The following loop handles the case where the resolved level
1834 jumps by more than one. This is typical for numbers inside a
1835 run of text with left-to-right embedding direction, but can
1836 also happen in other situations. In those cases the decision
1837 where to continue after a level change, and in what direction,
1838 is tricky. For example, given a text like below:
1839
1840 abcdefgh
1841 11336622
1842
1843 (where the numbers below the text show the resolved levels),
1844 the result of reordering according to UAX#9 should be this:
1845
1846 efdcghba
1847
1848 This is implemented by the loop below which flips direction
1849 and jumps to the other edge of the level each time it finds
1850 the new level not to be the expected one. The expected level
1851 is always one more or one less than the previous one. */
1852 next_level = bidi_peek_at_next_level (bidi_it);
1853 while (next_level != expected_next_level)
1854 {
1855 expected_next_level += incr;
1856 level_to_search += incr;
1857 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1858 bidi_it->scan_dir = -bidi_it->scan_dir;
1859 next_level = bidi_peek_at_next_level (bidi_it);
1860 }
1861
1862 /* Finally, deliver the next character in the new direction. */
1863 next_level = bidi_level_of_next_char (bidi_it);
1864 }
1865
1866 /* Take note when we have just processed the newline that precedes
1867 the end of the paragraph. The next time we are about to be
1868 called, set_iterator_to_next will automatically reinit the
1869 paragraph direction, if needed. We do this at the newline before
1870 the paragraph separator, because the next character might not be
1871 the first character of the next paragraph, due to the bidi
1872 reordering, whereas we _must_ know the paragraph base direction
1873 _before_ we process the paragraph's text, since the base
1874 direction affects the reordering. */
1875 if (bidi_it->scan_dir == 1
1876 && bidi_it->orig_type == NEUTRAL_B
1877 && bidi_it->bytepos < ZV_BYTE)
1878 {
1879 EMACS_INT sep_len =
1880 bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
1881 bidi_it->bytepos + bidi_it->ch_len);
1882 if (bidi_it->nchars <= 0)
1883 abort ();
1884 if (sep_len >= 0)
1885 {
1886 bidi_it->new_paragraph = 1;
1887 /* Record the buffer position of the last character of the
1888 paragraph separator. */
1889 bidi_it->separator_limit =
1890 bidi_it->charpos + bidi_it->nchars + sep_len;
1891 }
1892 }
1893
1894 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1895 {
1896 /* If we are at paragraph's base embedding level and beyond the
1897 last cached position, the cache's job is done and we can
1898 discard it. */
1899 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1900 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
1901 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
1902 bidi_cache_reset ();
1903 /* But as long as we are caching during forward scan, we must
1904 cache each state, or else the cache integrity will be
1905 compromised: it assumes cached states correspond to buffer
1906 positions 1:1. */
1907 else
1908 bidi_cache_iterator_state (bidi_it, 1);
1909 }
1910 }
1911
1912 /* This is meant to be called from within the debugger, whenever you
1913 wish to examine the cache contents. */
1914 void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
1915 void
1916 bidi_dump_cached_states (void)
1917 {
1918 ptrdiff_t i;
1919 int ndigits = 1;
1920
1921 if (bidi_cache_idx == 0)
1922 {
1923 fprintf (stderr, "The cache is empty.\n");
1924 return;
1925 }
1926 fprintf (stderr, "Total of %d state%s in cache:\n",
1927 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1928
1929 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1930 ndigits++;
1931 fputs ("ch ", stderr);
1932 for (i = 0; i < bidi_cache_idx; i++)
1933 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1934 fputs ("\n", stderr);
1935 fputs ("lvl ", stderr);
1936 for (i = 0; i < bidi_cache_idx; i++)
1937 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1938 fputs ("\n", stderr);
1939 fputs ("pos ", stderr);
1940 for (i = 0; i < bidi_cache_idx; i++)
1941 fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
1942 fputs ("\n", stderr);
1943 }