Derive bidi type table from UnicodeData.txt.
[bpt/emacs.git] / src / bidi.c
CommitLineData
b7b65b15 1/* Low-level bidirectional buffer-scanning functions for GNU Emacs.
b118e65f
EZ
2 Copyright (C) 2000, 2001, 2004, 2005, 2009, 2010
3 Free Software Foundation, Inc.
b7b65b15
EZ
4
5This file is part of GNU Emacs.
6
a8d11bd3 7GNU Emacs is free software: you can redistribute it and/or modify
b7b65b15 8it under the terms of the GNU General Public License as published by
a8d11bd3
EZ
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
b7b65b15
EZ
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
b7b65b15 17You should have received a copy of the GNU General Public License
a8d11bd3 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
b7b65b15 19
2d6e4628
EZ
20/* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
b7b65b15
EZ
23 as per UAX#9, a part of the Unicode Standard.
24
25 Unlike the reference and most other implementations, this one is
940afb59
EZ
26 designed to be called once for every character in the buffer or
27 string.
b7b65b15 28
4b292a22 29 The main entry point is bidi_move_to_visually_next. Each time it
b7b65b15
EZ
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
4b292a22
EZ
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
b7b65b15
EZ
36 character by resolving their levels on the fly.
37
940afb59
EZ
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
41 argument character.
42
89d3374a
EZ
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
46
b7b65b15
EZ
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
51
52#ifdef HAVE_CONFIG_H
53#include <config.h>
54#endif
55
56#include <stdio.h>
57
58#ifdef HAVE_STRING_H
59#include <string.h>
60#endif
61
29e3d8d1
EZ
62#include <setjmp.h>
63
b7b65b15
EZ
64#include "lisp.h"
65#include "buffer.h"
66#include "character.h"
67#include "dispextern.h"
68
69static int bidi_initialized = 0;
70
71static Lisp_Object bidi_type_table;
72
e7402cb2 73/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */
b7b65b15
EZ
74#define LRM_CHAR 0x200E
75#define RLM_CHAR 0x200F
76#define LRE_CHAR 0x202A
77#define RLE_CHAR 0x202B
78#define PDF_CHAR 0x202C
79#define LRO_CHAR 0x202D
80#define RLO_CHAR 0x202E
81
b7b65b15 82#define BIDI_EOB -1
e7402cb2 83#define BIDI_BOB -2 /* FIXME: Is this needed? */
b7b65b15 84
b7b65b15
EZ
85/* Local data structures. (Look in dispextern.h for the rest.) */
86
87/* What we need to know about the current paragraph. */
88struct bidi_paragraph_info {
89 int start_bytepos; /* byte position where it begins */
90 int end_bytepos; /* byte position where it ends */
91 int embedding_level; /* its basic embedding level */
92 bidi_dir_t base_dir; /* its base direction */
93};
94
95/* Data type for describing the bidirectional character categories. */
96typedef enum {
97 UNKNOWN_BC,
98 NEUTRAL,
99 WEAK,
100 STRONG
101} bidi_category_t;
102
103int bidi_ignore_explicit_marks_for_paragraph_level = 1;
104
372b7a95 105static Lisp_Object paragraph_start_re, paragraph_separate_re;
6bff6497 106static Lisp_Object Qparagraph_start, Qparagraph_separate;
b7b65b15 107
b7b65b15
EZ
108static void
109bidi_initialize ()
110{
317fbf33
EZ
111
112#include "biditype.h"
113
b7b65b15
EZ
114 int i;
115
116 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
2d6e4628 117 staticpro (&bidi_type_table);
b7b65b15
EZ
118
119 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
317fbf33 120 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
b7b65b15 121 make_number (bidi_type[i].type));
6bff6497 122
b4bf28b7
SM
123 Qparagraph_start = intern ("paragraph-start");
124 staticpro (&Qparagraph_start);
372b7a95
EZ
125 paragraph_start_re = Fsymbol_value (Qparagraph_start);
126 if (!STRINGP (paragraph_start_re))
127 paragraph_start_re = build_string ("\f\\|[ \t]*$");
128 staticpro (&paragraph_start_re);
b4bf28b7
SM
129 Qparagraph_separate = intern ("paragraph-separate");
130 staticpro (&Qparagraph_separate);
372b7a95
EZ
131 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
132 if (!STRINGP (paragraph_separate_re))
133 paragraph_separate_re = build_string ("[ \t\f]*$");
134 staticpro (&paragraph_separate_re);
b7b65b15
EZ
135 bidi_initialized = 1;
136}
137
6bff6497
EZ
138/* Return the bidi type of a character CH, subject to the current
139 directional OVERRIDE. */
fd3998ff 140static INLINE bidi_type_t
6bff6497 141bidi_get_type (int ch, bidi_dir_t override)
b7b65b15 142{
6bff6497
EZ
143 bidi_type_t default_type;
144
e7402cb2
EZ
145 if (ch == BIDI_EOB)
146 return NEUTRAL_B;
e342a24d
EZ
147 if (ch < 0 || ch > MAX_CHAR)
148 abort ();
6bff6497
EZ
149
150 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
151
152 if (override == NEUTRAL_DIR)
153 return default_type;
154
155 switch (default_type)
156 {
157 /* Although UAX#9 does not tell, it doesn't make sense to
158 override NEUTRAL_B and LRM/RLM characters. */
159 case NEUTRAL_B:
160 case LRE:
161 case LRO:
162 case RLE:
163 case RLO:
164 case PDF:
165 return default_type;
166 default:
167 switch (ch)
168 {
169 case LRM_CHAR:
170 case RLM_CHAR:
171 return default_type;
172 default:
173 if (override == L2R) /* X6 */
174 return STRONG_L;
175 else if (override == R2L)
176 return STRONG_R;
177 else
178 abort (); /* can't happen: handled above */
179 }
180 }
b7b65b15
EZ
181}
182
2d6e4628
EZ
183void
184bidi_check_type (bidi_type_t type)
185{
186 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
187 abort ();
188}
189
b7b65b15 190/* Given a bidi TYPE of a character, return its category. */
fd3998ff 191static INLINE bidi_category_t
b7b65b15
EZ
192bidi_get_category (bidi_type_t type)
193{
194 switch (type)
195 {
196 case UNKNOWN_BT:
197 return UNKNOWN_BC;
198 case STRONG_L:
199 case STRONG_R:
200 case STRONG_AL:
201 case LRE:
202 case LRO:
203 case RLE:
204 case RLO:
205 return STRONG;
206 case PDF: /* ??? really?? */
207 case WEAK_EN:
208 case WEAK_ES:
209 case WEAK_ET:
210 case WEAK_AN:
211 case WEAK_CS:
212 case WEAK_NSM:
213 case WEAK_BN:
214 return WEAK;
215 case NEUTRAL_B:
216 case NEUTRAL_S:
217 case NEUTRAL_WS:
218 case NEUTRAL_ON:
219 return NEUTRAL;
220 default:
221 abort ();
222 }
223}
224
e342a24d
EZ
225/* Return the mirrored character of C, if any.
226
227 Note: The conditions in UAX#9 clause L4 must be tested by the
228 caller. */
b7b65b15
EZ
229/* FIXME: exceedingly temporary! Should consult the Unicode database
230 of character properties. */
231int
232bidi_mirror_char (int c)
233{
234 static const char mirrored_pairs[] = "()<>[]{}";
9443b3c7 235 const char *p = c > 0 && c < 128 ? strchr (mirrored_pairs, c) : NULL;
b7b65b15
EZ
236
237 if (p)
238 {
239 size_t i = p - mirrored_pairs;
240
9443b3c7 241 return mirrored_pairs [(i ^ 1)];
b7b65b15
EZ
242 }
243 return c;
244}
245
246/* Copy the bidi iterator from FROM to TO. To save cycles, this only
247 copies the part of the level stack that is actually in use. */
fd3998ff 248static INLINE void
b7b65b15
EZ
249bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
250{
251 int i;
252
e69a9370
EZ
253 /* Copy everything except the level stack and beyond. */
254 memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
b7b65b15
EZ
255
256 /* Copy the active part of the level stack. */
257 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
258 for (i = 1; i <= from->stack_idx; i++)
259 to->level_stack[i] = from->level_stack[i];
260}
261
262/* Caching the bidi iterator states. */
263
2fe72643
EZ
264#define BIDI_CACHE_CHUNK 200
265static struct bidi_it *bidi_cache;
266static size_t bidi_cache_size = 0;
0416466c 267static size_t elsz = sizeof (struct bidi_it);
2fe72643
EZ
268static int bidi_cache_idx; /* next unused cache slot */
269static int bidi_cache_last_idx; /* slot of last cache hit */
b7b65b15 270
fd3998ff 271static INLINE void
b7b65b15
EZ
272bidi_cache_reset (void)
273{
274 bidi_cache_idx = 0;
275 bidi_cache_last_idx = -1;
276}
277
2fe72643
EZ
278static INLINE void
279bidi_cache_shrink (void)
280{
281 if (bidi_cache_size > BIDI_CACHE_CHUNK)
282 {
0416466c
EZ
283 bidi_cache_size = BIDI_CACHE_CHUNK;
284 bidi_cache =
285 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643
EZ
286 }
287 bidi_cache_reset ();
288}
289
fd3998ff 290static INLINE void
b7b65b15
EZ
291bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
292{
293 int current_scan_dir = bidi_it->scan_dir;
294
295 if (idx < 0 || idx >= bidi_cache_idx)
296 abort ();
297
298 bidi_copy_it (bidi_it, &bidi_cache[idx]);
299 bidi_it->scan_dir = current_scan_dir;
300 bidi_cache_last_idx = idx;
301}
302
303/* Find a cached state with a given CHARPOS and resolved embedding
304 level less or equal to LEVEL. if LEVEL is -1, disregard the
305 resolved levels in cached states. DIR, if non-zero, means search
306 in that direction from the last cache hit. */
fd3998ff 307static INLINE int
b7b65b15
EZ
308bidi_cache_search (int charpos, int level, int dir)
309{
310 int i, i_start;
311
312 if (bidi_cache_idx)
313 {
314 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
315 dir = -1;
316 else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
317 dir = 1;
318 if (dir)
319 i_start = bidi_cache_last_idx;
320 else
321 {
322 dir = -1;
323 i_start = bidi_cache_idx - 1;
324 }
325
326 if (dir < 0)
327 {
328 /* Linear search for now; FIXME! */
329 for (i = i_start; i >= 0; i--)
330 if (bidi_cache[i].charpos == charpos
331 && (level == -1 || bidi_cache[i].resolved_level <= level))
332 return i;
333 }
334 else
335 {
336 for (i = i_start; i < bidi_cache_idx; i++)
337 if (bidi_cache[i].charpos == charpos
338 && (level == -1 || bidi_cache[i].resolved_level <= level))
339 return i;
340 }
341 }
342
343 return -1;
344}
345
346/* Find a cached state where the resolved level changes to a value
347 that is lower than LEVEL, and return its cache slot index. DIR is
348 the direction to search, starting with the last used cache slot.
349 BEFORE, if non-zero, means return the index of the slot that is
350 ``before'' the level change in the search direction. That is,
351 given the cached levels like this:
352
353 1122333442211
354 AB C
355
356 and assuming we are at the position cached at the slot marked with
357 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
358 index of slot B or A, depending whether BEFORE is, respectively,
359 non-zero or zero. */
360static int
361bidi_cache_find_level_change (int level, int dir, int before)
362{
363 if (bidi_cache_idx)
364 {
365 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
366 int incr = before ? 1 : 0;
367
368 if (!dir)
369 dir = -1;
370 else if (!incr)
371 i += dir;
372
373 if (dir < 0)
374 {
375 while (i >= incr)
376 {
377 if (bidi_cache[i - incr].resolved_level >= 0
378 && bidi_cache[i - incr].resolved_level < level)
379 return i;
380 i--;
381 }
382 }
383 else
384 {
385 while (i < bidi_cache_idx - incr)
386 {
387 if (bidi_cache[i + incr].resolved_level >= 0
388 && bidi_cache[i + incr].resolved_level < level)
389 return i;
390 i++;
391 }
392 }
393 }
394
395 return -1;
396}
397
fd3998ff 398static INLINE void
b7b65b15
EZ
399bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
400{
401 int idx;
402
403 /* We should never cache on backward scans. */
404 if (bidi_it->scan_dir == -1)
405 abort ();
406 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
407
408 if (idx < 0)
409 {
410 idx = bidi_cache_idx;
2fe72643
EZ
411 /* Enlarge the cache as needed. */
412 if (idx >= bidi_cache_size)
413 {
0416466c 414 bidi_cache_size += BIDI_CACHE_CHUNK;
2fe72643 415 bidi_cache =
0416466c 416 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643 417 }
bd924a5d
EZ
418 /* Character positions should correspond to cache positions 1:1.
419 If we are outside the range of cached positions, the cache is
420 useless and must be reset. */
421 if (idx > 0 &&
422 (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
423 || bidi_it->charpos < bidi_cache[0].charpos))
424 {
425 bidi_cache_reset ();
426 idx = 0;
427 }
b7b65b15
EZ
428 bidi_copy_it (&bidi_cache[idx], bidi_it);
429 if (!resolved)
430 bidi_cache[idx].resolved_level = -1;
431 }
432 else
433 {
434 /* Copy only the members which could have changed, to avoid
435 costly copying of the entire struct. */
436 bidi_cache[idx].type = bidi_it->type;
2d6e4628 437 bidi_check_type (bidi_it->type);
89d3374a
EZ
438 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
439 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
440 if (resolved)
441 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
442 else
443 bidi_cache[idx].resolved_level = -1;
444 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
445 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
446 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
447 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
448 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
449 }
450
451 bidi_cache_last_idx = idx;
452 if (idx >= bidi_cache_idx)
453 bidi_cache_idx = idx + 1;
454}
455
fd3998ff 456static INLINE bidi_type_t
b7b65b15
EZ
457bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
458{
459 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
460
461 if (i >= 0)
462 {
463 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
464
5009f85e 465 bidi_copy_it (bidi_it, &bidi_cache[i]);
b7b65b15
EZ
466 bidi_cache_last_idx = i;
467 /* Don't let scan direction from from the cached state override
468 the current scan direction. */
469 bidi_it->scan_dir = current_scan_dir;
470 return bidi_it->type;
471 }
472
473 return UNKNOWN_BT;
474}
475
fd3998ff 476static INLINE int
b7b65b15
EZ
477bidi_peek_at_next_level (struct bidi_it *bidi_it)
478{
479 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
480 abort ();
481 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
482}
483
be39f003
EZ
484/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
485 Value is the non-negative length of the paragraph separator
486 following the buffer position, -1 if position is at the beginning
487 of a new paragraph, or -2 if position is neither at beginning nor
488 at end of a paragraph. */
fd3998ff 489static EMACS_INT
6bff6497 490bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
b7b65b15 491{
b4bf28b7 492 /* FIXME: Why Fbuffer_local_value rather than just Fsymbol_value? */
372b7a95
EZ
493 Lisp_Object sep_re;
494 Lisp_Object start_re;
be39f003
EZ
495 EMACS_INT val;
496
372b7a95
EZ
497 sep_re = paragraph_separate_re;
498 start_re = paragraph_start_re;
be39f003
EZ
499
500 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
501 if (val < 0)
502 {
503 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
504 val = -1;
505 else
506 val = -2;
507 }
b7b65b15 508
be39f003 509 return val;
b7b65b15
EZ
510}
511
512/* Determine the start-of-run (sor) directional type given the two
513 embedding levels on either side of the run boundary. Also, update
514 the saved info about previously seen characters, since that info is
515 generally valid for a single level run. */
fd3998ff 516static INLINE void
b7b65b15
EZ
517bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
518{
519 int higher_level = level_before > level_after ? level_before : level_after;
520
521 /* The prev_was_pdf gork is required for when we have several PDFs
522 in a row. In that case, we want to compute the sor type for the
523 next level run only once: when we see the first PDF. That's
524 because the sor type depends only on the higher of the two levels
525 that we find on the two sides of the level boundary (see UAX#9,
526 clause X10), and so we don't need to know the final embedding
527 level to which we descend after processing all the PDFs. */
e342a24d 528 if (!bidi_it->prev_was_pdf || level_before < level_after)
b7b65b15
EZ
529 /* FIXME: should the default sor direction be user selectable? */
530 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
531 if (level_before > level_after)
532 bidi_it->prev_was_pdf = 1;
533
534 bidi_it->prev.type = UNKNOWN_BT;
89d3374a
EZ
535 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
536 bidi_it->last_strong.orig_type = UNKNOWN_BT;
b7b65b15
EZ
537 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
538 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
539 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
89d3374a
EZ
540 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
541 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15
EZ
542 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
543}
544
6bff6497 545static void
be39f003
EZ
546bidi_line_init (struct bidi_it *bidi_it)
547{
548 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
549 bidi_it->resolved_level = bidi_it->level_stack[0].level;
550 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
551 bidi_it->invalid_levels = 0;
552 bidi_it->invalid_rl_levels = -1;
553 bidi_it->next_en_pos = -1;
554 bidi_it->next_for_ws.type = UNKNOWN_BT;
b44d9321
EZ
555 bidi_set_sor_type (bidi_it,
556 bidi_it->paragraph_dir == R2L ? 1 : 0,
be39f003
EZ
557 bidi_it->level_stack[0].level); /* X10 */
558
559 bidi_cache_reset ();
560}
561
562/* Find the beginning of this paragraph by looking back in the buffer.
563 Value is the byte position of the paragraph's beginning. */
564static EMACS_INT
b44d9321 565bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
6bff6497 566{
372b7a95 567 Lisp_Object re = paragraph_start_re;
6bff6497
EZ
568 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
569
6bff6497
EZ
570 while (pos_byte > BEGV_BYTE
571 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
572 {
be39f003
EZ
573 pos = find_next_newline_no_quit (pos - 1, -1);
574 pos_byte = CHAR_TO_BYTE (pos);
6bff6497 575 }
be39f003 576 return pos_byte;
6bff6497
EZ
577}
578
be39f003 579/* Determine the direction, a.k.a. base embedding level, of the
b44d9321
EZ
580 paragraph we are about to iterate through. If DIR is either L2R or
581 R2L, just use that. Otherwise, determine the paragraph direction
582 from the first strong character of the paragraph.
583
584 Note that this gives the paragraph separator the same direction as
585 the preceding paragraph, even though Emacs generally views the
586 separartor as not belonging to any paragraph. */
b7b65b15
EZ
587void
588bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
589{
6bff6497 590 EMACS_INT bytepos = bidi_it->bytepos;
e7402cb2 591
5e65aec0
EZ
592 /* Special case for an empty buffer. */
593 if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
594 dir = L2R;
9c82e145 595 /* We should never be called at EOB or before BEGV. */
5e65aec0 596 else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
9c82e145
EZ
597 abort ();
598
be39f003
EZ
599 if (dir == L2R)
600 {
601 bidi_it->paragraph_dir = L2R;
602 bidi_it->new_paragraph = 0;
603 }
604 else if (dir == R2L)
605 {
606 bidi_it->paragraph_dir = R2L;
607 bidi_it->new_paragraph = 0;
608 }
b7b65b15
EZ
609 else if (dir == NEUTRAL_DIR) /* P2 */
610 {
6bff6497
EZ
611 int ch, ch_len;
612 EMACS_INT pos;
613 bidi_type_t type;
be39f003 614
d20e1419
EZ
615 if (!bidi_initialized)
616 bidi_initialize ();
617
be39f003
EZ
618 /* If we are inside a paragraph separator, we are just waiting
619 for the separator to be exhausted; use the previous paragraph
e5a2fec7
EZ
620 direction. But don't do that if we have been just reseated,
621 because we need to reinitialize below in that case. */
622 if (!bidi_it->first_elt
623 && bidi_it->charpos < bidi_it->separator_limit)
be39f003
EZ
624 return;
625
b44d9321 626 /* If we are on a newline, get past it to where the next
5e65aec0
EZ
627 paragraph might start. But don't do that at BEGV since then
628 we are potentially in a new paragraph that doesn't yet
629 exist. */
c143c213 630 pos = bidi_it->charpos;
5e65aec0 631 if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
be39f003 632 {
b44d9321 633 bytepos++;
c143c213 634 pos++;
be39f003 635 }
b44d9321
EZ
636
637 /* We are either at the beginning of a paragraph or in the
638 middle of it. Find where this paragraph starts. */
639 bytepos = bidi_find_paragraph_start (pos, bytepos);
6bff6497 640
be39f003
EZ
641 bidi_it->separator_limit = -1;
642 bidi_it->new_paragraph = 0;
6bff6497
EZ
643 ch = FETCH_CHAR (bytepos);
644 ch_len = CHAR_BYTES (ch);
be39f003 645 pos = BYTE_TO_CHAR (bytepos);
6bff6497 646 type = bidi_get_type (ch, NEUTRAL_DIR);
b7b65b15 647
e342a24d 648 for (pos++, bytepos += ch_len;
b7b65b15
EZ
649 /* NOTE: UAX#9 says to search only for L, AL, or R types of
650 characters, and ignore RLE, RLO, LRE, and LRO. However,
651 I'm not sure it makes sense to omit those 4; should try
652 with and without that to see the effect. */
653 (bidi_get_category (type) != STRONG)
654 || (bidi_ignore_explicit_marks_for_paragraph_level
655 && (type == RLE || type == RLO
656 || type == LRE || type == LRO));
6bff6497 657 type = bidi_get_type (ch, NEUTRAL_DIR))
b7b65b15 658 {
be39f003 659 if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
b7b65b15 660 break;
21fce5ab
EZ
661 if (bytepos >= ZV_BYTE)
662 {
663 /* Pretend there's a paragraph separator at end of buffer. */
664 type = NEUTRAL_B;
665 break;
666 }
b7b65b15
EZ
667 FETCH_CHAR_ADVANCE (ch, pos, bytepos);
668 }
669 if (type == STRONG_R || type == STRONG_AL) /* P3 */
be39f003
EZ
670 bidi_it->paragraph_dir = R2L;
671 else if (type == STRONG_L)
672 bidi_it->paragraph_dir = L2R;
b7b65b15 673 }
be39f003
EZ
674 else
675 abort ();
b7b65b15 676
b44d9321
EZ
677 /* Contrary to UAX#9 clause P3, we only default the paragraph
678 direction to L2R if we have no previous usable paragraph
679 direction. */
d20e1419 680 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
b44d9321 681 bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
be39f003 682 if (bidi_it->paragraph_dir == R2L)
b44d9321 683 bidi_it->level_stack[0].level = 1;
be39f003 684 else
b44d9321 685 bidi_it->level_stack[0].level = 0;
be39f003
EZ
686
687 bidi_line_init (bidi_it);
b7b65b15
EZ
688}
689
6bff6497
EZ
690/* Do whatever UAX#9 clause X8 says should be done at paragraph's
691 end. */
fd3998ff 692static INLINE void
b7b65b15
EZ
693bidi_set_paragraph_end (struct bidi_it *bidi_it)
694{
695 bidi_it->invalid_levels = 0;
696 bidi_it->invalid_rl_levels = -1;
697 bidi_it->stack_idx = 0;
698 bidi_it->resolved_level = bidi_it->level_stack[0].level;
b7b65b15
EZ
699}
700
89d3374a 701/* Initialize the bidi iterator from buffer position CHARPOS. */
b7b65b15 702void
6bff6497 703bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
b7b65b15
EZ
704{
705 if (! bidi_initialized)
706 bidi_initialize ();
89d3374a
EZ
707 bidi_it->charpos = charpos;
708 bidi_it->bytepos = bytepos;
6bff6497
EZ
709 bidi_it->first_elt = 1;
710 bidi_set_paragraph_end (bidi_it);
711 bidi_it->new_paragraph = 1;
be39f003 712 bidi_it->separator_limit = -1;
b7b65b15 713 bidi_it->type = NEUTRAL_B;
ebb5722e
EZ
714 bidi_it->type_after_w1 = NEUTRAL_B;
715 bidi_it->orig_type = NEUTRAL_B;
b7b65b15 716 bidi_it->prev_was_pdf = 0;
ebb5722e
EZ
717 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
718 bidi_it->prev.orig_type = UNKNOWN_BT;
89d3374a
EZ
719 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
720 bidi_it->last_strong.orig_type = UNKNOWN_BT;
b7b65b15
EZ
721 bidi_it->next_for_neutral.charpos = -1;
722 bidi_it->next_for_neutral.type =
89d3374a
EZ
723 bidi_it->next_for_neutral.type_after_w1 =
724 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15
EZ
725 bidi_it->prev_for_neutral.charpos = -1;
726 bidi_it->prev_for_neutral.type =
89d3374a
EZ
727 bidi_it->prev_for_neutral.type_after_w1 =
728 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15 729 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
2fe72643 730 bidi_cache_shrink ();
b7b65b15
EZ
731}
732
733/* Push the current embedding level and override status; reset the
734 current level to LEVEL and the current override status to OVERRIDE. */
fd3998ff 735static INLINE void
b7b65b15
EZ
736bidi_push_embedding_level (struct bidi_it *bidi_it,
737 int level, bidi_dir_t override)
738{
739 bidi_it->stack_idx++;
740 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
741 abort ();
742 bidi_it->level_stack[bidi_it->stack_idx].level = level;
743 bidi_it->level_stack[bidi_it->stack_idx].override = override;
744}
745
746/* Pop the embedding level and directional override status from the
747 stack, and return the new level. */
fd3998ff 748static INLINE int
b7b65b15
EZ
749bidi_pop_embedding_level (struct bidi_it *bidi_it)
750{
751 /* UAX#9 says to ignore invalid PDFs. */
752 if (bidi_it->stack_idx > 0)
753 bidi_it->stack_idx--;
754 return bidi_it->level_stack[bidi_it->stack_idx].level;
755}
756
757/* Record in SAVED_INFO the information about the current character. */
fd3998ff 758static INLINE void
b7b65b15
EZ
759bidi_remember_char (struct bidi_saved_info *saved_info,
760 struct bidi_it *bidi_it)
761{
762 saved_info->charpos = bidi_it->charpos;
763 saved_info->bytepos = bidi_it->bytepos;
764 saved_info->type = bidi_it->type;
2d6e4628 765 bidi_check_type (bidi_it->type);
89d3374a
EZ
766 saved_info->type_after_w1 = bidi_it->type_after_w1;
767 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 768 saved_info->orig_type = bidi_it->orig_type;
2d6e4628 769 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
770}
771
772/* Resolve the type of a neutral character according to the type of
773 surrounding strong text and the current embedding level. */
fd3998ff 774static INLINE bidi_type_t
b7b65b15
EZ
775bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
776{
777 /* N1: European and Arabic numbers are treated as though they were R. */
778 if (next_type == WEAK_EN || next_type == WEAK_AN)
779 next_type = STRONG_R;
780 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
781 prev_type = STRONG_R;
782
783 if (next_type == prev_type) /* N1 */
784 return next_type;
785 else if ((lev & 1) == 0) /* N2 */
786 return STRONG_L;
787 else
788 return STRONG_R;
789}
790
fd3998ff 791static INLINE int
b7b65b15
EZ
792bidi_explicit_dir_char (int c)
793{
794 /* FIXME: this should be replaced with a lookup table with suitable
795 bits set, like standard C ctype macros do. */
796 return (c == LRE_CHAR || c == LRO_CHAR
797 || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
798}
799
800/* A helper function for bidi_resolve_explicit. It advances to the
801 next character in logical order and determines the new embedding
802 level and directional override, but does not take into account
803 empty embeddings. */
804static int
805bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
806{
807 int curchar;
808 bidi_type_t type;
809 int current_level;
810 int new_level;
811 bidi_dir_t override;
812
9c82e145
EZ
813 if (bidi_it->bytepos < BEGV_BYTE /* after reseat to BEGV? */
814 || bidi_it->first_elt)
e7402cb2 815 {
9c82e145
EZ
816 bidi_it->first_elt = 0;
817 if (bidi_it->charpos < BEGV)
818 bidi_it->charpos = BEGV;
819 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
e7402cb2 820 }
9c82e145 821 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
b7b65b15
EZ
822 {
823 bidi_it->charpos++;
e7402cb2
EZ
824 if (bidi_it->ch_len == 0)
825 abort ();
b7b65b15
EZ
826 bidi_it->bytepos += bidi_it->ch_len;
827 }
828
829 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
830 override = bidi_it->level_stack[bidi_it->stack_idx].override;
831 new_level = current_level;
832
833 /* in case it is a unibyte character (not yet implemented) */
834 /* _fetch_multibyte_char_len = 1; */
e7402cb2
EZ
835 if (bidi_it->bytepos >= ZV_BYTE)
836 {
837 curchar = BIDI_EOB;
838 bidi_it->ch_len = 1;
839 }
840 else
841 {
842 curchar = FETCH_CHAR (bidi_it->bytepos);
843 bidi_it->ch_len = CHAR_BYTES (curchar);
844 }
b7b65b15 845 bidi_it->ch = curchar;
b7b65b15 846
6bff6497
EZ
847 /* Don't apply directional override here, as all the types we handle
848 below will not be affected by the override anyway, and we need
849 the original type unaltered. The override will be applied in
850 bidi_resolve_weak. */
851 type = bidi_get_type (curchar, NEUTRAL_DIR);
89d3374a
EZ
852 bidi_it->orig_type = type;
853 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
854
855 if (type != PDF)
856 bidi_it->prev_was_pdf = 0;
857
89d3374a 858 bidi_it->type_after_w1 = UNKNOWN_BT;
b7b65b15
EZ
859
860 switch (type)
861 {
862 case RLE: /* X2 */
863 case RLO: /* X4 */
89d3374a
EZ
864 bidi_it->type_after_w1 = type;
865 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
866 type = WEAK_BN; /* X9/Retaining */
867 if (bidi_it->ignore_bn_limit <= 0)
868 {
869 if (current_level <= BIDI_MAXLEVEL - 4)
870 {
871 /* Compute the least odd embedding level greater than
872 the current level. */
873 new_level = ((current_level + 1) & ~1) + 1;
89d3374a 874 if (bidi_it->type_after_w1 == RLE)
b7b65b15
EZ
875 override = NEUTRAL_DIR;
876 else
877 override = R2L;
878 if (current_level == BIDI_MAXLEVEL - 4)
879 bidi_it->invalid_rl_levels = 0;
880 bidi_push_embedding_level (bidi_it, new_level, override);
881 }
882 else
883 {
884 bidi_it->invalid_levels++;
885 /* See the commentary about invalid_rl_levels below. */
886 if (bidi_it->invalid_rl_levels < 0)
887 bidi_it->invalid_rl_levels = 0;
888 bidi_it->invalid_rl_levels++;
889 }
890 }
89d3374a 891 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
892 || bidi_it->next_en_pos > bidi_it->charpos)
893 type = WEAK_EN;
894 break;
895 case LRE: /* X3 */
896 case LRO: /* X5 */
89d3374a
EZ
897 bidi_it->type_after_w1 = type;
898 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
899 type = WEAK_BN; /* X9/Retaining */
900 if (bidi_it->ignore_bn_limit <= 0)
901 {
902 if (current_level <= BIDI_MAXLEVEL - 5)
903 {
904 /* Compute the least even embedding level greater than
905 the current level. */
906 new_level = ((current_level + 2) & ~1);
89d3374a 907 if (bidi_it->type_after_w1 == LRE)
b7b65b15
EZ
908 override = NEUTRAL_DIR;
909 else
910 override = L2R;
911 bidi_push_embedding_level (bidi_it, new_level, override);
912 }
913 else
914 {
915 bidi_it->invalid_levels++;
916 /* invalid_rl_levels counts invalid levels encountered
917 while the embedding level was already too high for
918 LRE/LRO, but not for RLE/RLO. That is because
919 there may be exactly one PDF which we should not
920 ignore even though invalid_levels is non-zero.
921 invalid_rl_levels helps to know what PDF is
922 that. */
923 if (bidi_it->invalid_rl_levels >= 0)
924 bidi_it->invalid_rl_levels++;
925 }
926 }
89d3374a 927 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
928 || bidi_it->next_en_pos > bidi_it->charpos)
929 type = WEAK_EN;
930 break;
931 case PDF: /* X7 */
89d3374a
EZ
932 bidi_it->type_after_w1 = type;
933 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
934 type = WEAK_BN; /* X9/Retaining */
935 if (bidi_it->ignore_bn_limit <= 0)
936 {
937 if (!bidi_it->invalid_rl_levels)
938 {
939 new_level = bidi_pop_embedding_level (bidi_it);
940 bidi_it->invalid_rl_levels = -1;
941 if (bidi_it->invalid_levels)
942 bidi_it->invalid_levels--;
943 /* else nothing: UAX#9 says to ignore invalid PDFs */
944 }
945 if (!bidi_it->invalid_levels)
946 new_level = bidi_pop_embedding_level (bidi_it);
947 else
948 {
949 bidi_it->invalid_levels--;
950 bidi_it->invalid_rl_levels--;
951 }
952 }
89d3374a 953 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
954 || bidi_it->next_en_pos > bidi_it->charpos)
955 type = WEAK_EN;
956 break;
957 default:
958 /* Nothing. */
959 break;
960 }
961
962 bidi_it->type = type;
2d6e4628 963 bidi_check_type (bidi_it->type);
b7b65b15
EZ
964
965 return new_level;
966}
967
968/* Given an iterator state in BIDI_IT, advance one character position
969 in the buffer to the next character (in the logical order), resolve
970 any explicit embeddings and directional overrides, and return the
971 embedding level of the character after resolving explicit
972 directives and ignoring empty embeddings. */
973static int
974bidi_resolve_explicit (struct bidi_it *bidi_it)
975{
976 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
977 int new_level = bidi_resolve_explicit_1 (bidi_it);
978
979 if (prev_level < new_level
980 && bidi_it->type == WEAK_BN
981 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1502b819 982 && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
b7b65b15
EZ
983 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
984 + bidi_it->ch_len)))
985 {
986 /* Avoid pushing and popping embedding levels if the level run
987 is empty, as this breaks level runs where it shouldn't.
988 UAX#9 removes all the explicit embedding and override codes,
989 so empty embeddings disappear without a trace. We need to
990 behave as if we did the same. */
991 struct bidi_it saved_it;
992 int level = prev_level;
993
994 bidi_copy_it (&saved_it, bidi_it);
995
996 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
997 + bidi_it->ch_len)))
998 {
999 level = bidi_resolve_explicit_1 (bidi_it);
1000 }
1001
1002 if (level == prev_level) /* empty embedding */
1003 saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1004 else /* this embedding is non-empty */
1005 saved_it.ignore_bn_limit = -1;
1006
1007 bidi_copy_it (bidi_it, &saved_it);
1008 if (bidi_it->ignore_bn_limit > 0)
1009 {
1010 /* We pushed a level, but we shouldn't have. Undo that. */
1011 if (!bidi_it->invalid_rl_levels)
1012 {
1013 new_level = bidi_pop_embedding_level (bidi_it);
1014 bidi_it->invalid_rl_levels = -1;
1015 if (bidi_it->invalid_levels)
1016 bidi_it->invalid_levels--;
1017 }
1018 if (!bidi_it->invalid_levels)
1019 new_level = bidi_pop_embedding_level (bidi_it);
1020 else
1021 {
1022 bidi_it->invalid_levels--;
1023 bidi_it->invalid_rl_levels--;
1024 }
1025 }
1026 }
1027
b7b65b15
EZ
1028 if (bidi_it->type == NEUTRAL_B) /* X8 */
1029 {
21fce5ab 1030 bidi_set_paragraph_end (bidi_it);
6bff6497
EZ
1031 /* This is needed by bidi_resolve_weak below, and in L1. */
1032 bidi_it->type_after_w1 = bidi_it->type;
89d3374a 1033 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1034 }
1035
1036 return new_level;
1037}
1038
1039/* Advance in the buffer, resolve weak types and return the type of
1040 the next character after weak type resolution. */
fd3998ff 1041static bidi_type_t
b7b65b15
EZ
1042bidi_resolve_weak (struct bidi_it *bidi_it)
1043{
1044 bidi_type_t type;
1045 bidi_dir_t override;
1046 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1047 int new_level = bidi_resolve_explicit (bidi_it);
1048 int next_char;
1049 bidi_type_t type_of_next;
1050 struct bidi_it saved_it;
1051
1052 type = bidi_it->type;
1053 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1054
1055 if (type == UNKNOWN_BT
1056 || type == LRE
1057 || type == LRO
1058 || type == RLE
1059 || type == RLO
1060 || type == PDF)
1061 abort ();
1062
1063 if (new_level != prev_level
1064 || bidi_it->type == NEUTRAL_B)
1065 {
1066 /* We've got a new embedding level run, compute the directional
1067 type of sor and initialize per-run variables (UAX#9, clause
1068 X10). */
1069 bidi_set_sor_type (bidi_it, prev_level, new_level);
1070 }
1071 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1072 || type == WEAK_BN || type == STRONG_AL)
89d3374a
EZ
1073 bidi_it->type_after_w1 = type; /* needed in L1 */
1074 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1075
1076 /* Level and directional override status are already recorded in
1077 bidi_it, and do not need any change; see X6. */
1078 if (override == R2L) /* X6 */
1079 type = STRONG_R;
1080 else if (override == L2R)
1081 type = STRONG_L;
bc5a45f3 1082 else
b7b65b15 1083 {
bc5a45f3 1084 if (type == WEAK_NSM) /* W1 */
b7b65b15 1085 {
bc5a45f3 1086 /* Note that we don't need to consider the case where the
5930fe97
EZ
1087 prev character has its type overridden by an RLO or LRO,
1088 because then either the type of this NSM would have been
1089 also overridden, or the previous character is outside the
1090 current level run, and thus not relevant to this NSM.
1091 This is why NSM gets the type_after_w1 of the previous
1092 character. */
ebb5722e
EZ
1093 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1094 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1095 && bidi_it->prev.type_after_w1 != NEUTRAL_B)
5930fe97 1096 type = bidi_it->prev.type_after_w1;
bc5a45f3
EZ
1097 else if (bidi_it->sor == R2L)
1098 type = STRONG_R;
1099 else if (bidi_it->sor == L2R)
1100 type = STRONG_L;
1101 else /* shouldn't happen! */
1102 abort ();
b7b65b15 1103 }
bc5a45f3
EZ
1104 if (type == WEAK_EN /* W2 */
1105 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1106 type = WEAK_AN;
1107 else if (type == STRONG_AL) /* W3 */
1108 type = STRONG_R;
1109 else if ((type == WEAK_ES /* W4 */
1110 && bidi_it->prev.type_after_w1 == WEAK_EN
1111 && bidi_it->prev.orig_type == WEAK_EN)
1112 || (type == WEAK_CS
1113 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1114 && bidi_it->prev.orig_type == WEAK_EN)
1115 || bidi_it->prev.type_after_w1 == WEAK_AN)))
b7b65b15 1116 {
e7402cb2
EZ
1117 next_char =
1118 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1119 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
6bff6497 1120 type_of_next = bidi_get_type (next_char, override);
b7b65b15 1121
bc5a45f3 1122 if (type_of_next == WEAK_BN
b7b65b15
EZ
1123 || bidi_explicit_dir_char (next_char))
1124 {
1125 bidi_copy_it (&saved_it, bidi_it);
1126 while (bidi_resolve_explicit (bidi_it) == new_level
bc5a45f3 1127 && bidi_it->type == WEAK_BN)
b7b65b15
EZ
1128 ;
1129 type_of_next = bidi_it->type;
b7b65b15
EZ
1130 bidi_copy_it (bidi_it, &saved_it);
1131 }
bc5a45f3
EZ
1132
1133 /* If the next character is EN, but the last strong-type
1134 character is AL, that next EN will be changed to AN when
1135 we process it in W2 above. So in that case, this ES
1136 should not be changed into EN. */
1137 if (type == WEAK_ES
1138 && type_of_next == WEAK_EN
1139 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1140 type = WEAK_EN;
1141 else if (type == WEAK_CS)
b7b65b15 1142 {
bc5a45f3
EZ
1143 if (bidi_it->prev.type_after_w1 == WEAK_AN
1144 && (type_of_next == WEAK_AN
1145 /* If the next character is EN, but the last
1146 strong-type character is AL, EN will be later
1147 changed to AN when we process it in W2 above.
1148 So in that case, this ES should not be
1149 changed into EN. */
1150 || (type_of_next == WEAK_EN
1151 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1152 type = WEAK_AN;
1153 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1154 && type_of_next == WEAK_EN
1155 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1156 type = WEAK_EN;
1157 }
1158 }
1159 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1160 || type == WEAK_BN) /* W5/Retaining */
1161 {
1162 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1163 || bidi_it->next_en_pos > bidi_it->charpos)
1164 type = WEAK_EN;
1165 else /* W5: ET/BN with EN after it. */
1166 {
1167 EMACS_INT en_pos = bidi_it->charpos + 1;
1168
1169 next_char =
1170 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1171 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1172 type_of_next = bidi_get_type (next_char, override);
1173
1174 if (type_of_next == WEAK_ET
1175 || type_of_next == WEAK_BN
1176 || bidi_explicit_dir_char (next_char))
1177 {
1178 bidi_copy_it (&saved_it, bidi_it);
1179 while (bidi_resolve_explicit (bidi_it) == new_level
1180 && (bidi_it->type == WEAK_BN
1181 || bidi_it->type == WEAK_ET))
1182 ;
1183 type_of_next = bidi_it->type;
1184 en_pos = bidi_it->charpos;
1185 bidi_copy_it (bidi_it, &saved_it);
1186 }
1187 if (type_of_next == WEAK_EN)
b7b65b15 1188 {
bc5a45f3
EZ
1189 /* If the last strong character is AL, the EN we've
1190 found will become AN when we get to it (W2). */
1191 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1192 {
1193 type = WEAK_EN;
1194 /* Remember this EN position, to speed up processing
1195 of the next ETs. */
1196 bidi_it->next_en_pos = en_pos;
1197 }
1198 else if (type == WEAK_BN)
1199 type = NEUTRAL_ON; /* W6/Retaining */
b7b65b15 1200 }
b7b65b15
EZ
1201 }
1202 }
1203 }
1204
1205 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
89d3374a
EZ
1206 || (type == WEAK_BN
1207 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1208 || bidi_it->prev.type_after_w1 == WEAK_ES
1209 || bidi_it->prev.type_after_w1 == WEAK_ET)))
b7b65b15
EZ
1210 type = NEUTRAL_ON;
1211
1212 /* Store the type we've got so far, before we clobber it with strong
1213 types in W7 and while resolving neutral types. But leave alone
1214 the original types that were recorded above, because we will need
1215 them for the L1 clause. */
89d3374a
EZ
1216 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1217 bidi_it->type_after_w1 = type;
1218 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1219
1220 if (type == WEAK_EN) /* W7 */
1221 {
89d3374a 1222 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
b7b65b15
EZ
1223 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1224 type = STRONG_L;
1225 }
1226
1227 bidi_it->type = type;
2d6e4628 1228 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1229 return type;
1230}
1231
fd3998ff 1232static bidi_type_t
b7b65b15
EZ
1233bidi_resolve_neutral (struct bidi_it *bidi_it)
1234{
1235 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1236 bidi_type_t type = bidi_resolve_weak (bidi_it);
1237 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1238
1239 if (!(type == STRONG_R
1240 || type == STRONG_L
1241 || type == WEAK_BN
1242 || type == WEAK_EN
1243 || type == WEAK_AN
1244 || type == NEUTRAL_B
1245 || type == NEUTRAL_S
1246 || type == NEUTRAL_WS
1247 || type == NEUTRAL_ON))
1248 abort ();
1249
1250 if (bidi_get_category (type) == NEUTRAL
1251 || (type == WEAK_BN && prev_level == current_level))
1252 {
1253 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1254 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1255 bidi_it->next_for_neutral.type,
1256 current_level);
1257 else
1258 {
1259 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1260 the assumption of batch-style processing; see clauses W4,
1261 W5, and especially N1, which require to look far forward
1262 (as well as back) in the buffer. May the fleas of a
1263 thousand camels infest the armpits of those who design
1264 supposedly general-purpose algorithms by looking at their
1265 own implementations, and fail to consider other possible
1266 implementations! */
1267 struct bidi_it saved_it;
1268 bidi_type_t next_type;
1269
1270 if (bidi_it->scan_dir == -1)
1271 abort ();
1272
1273 bidi_copy_it (&saved_it, bidi_it);
1274 /* Scan the text forward until we find the first non-neutral
1275 character, and then use that to resolve the neutral we
1276 are dealing with now. We also cache the scanned iterator
1277 states, to salvage some of the effort later. */
1278 bidi_cache_iterator_state (bidi_it, 0);
1279 do {
1280 /* Record the info about the previous character, so that
1281 it will be cached below with this state. */
89d3374a 1282 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1283 && bidi_it->type != WEAK_BN)
1284 bidi_remember_char (&bidi_it->prev, bidi_it);
1285 type = bidi_resolve_weak (bidi_it);
1286 /* Paragraph separators have their levels fully resolved
1287 at this point, so cache them as resolved. */
1288 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1289 /* FIXME: implement L1 here, by testing for a newline and
1290 resetting the level for any sequence of whitespace
1291 characters adjacent to it. */
1292 } while (!(type == NEUTRAL_B
1293 || (type != WEAK_BN
1294 && bidi_get_category (type) != NEUTRAL)
1295 /* This is all per level run, so stop when we
1296 reach the end of this level run. */
1297 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1298 current_level));
1299
1300 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1301
1302 switch (type)
1303 {
1304 case STRONG_L:
1305 case STRONG_R:
1306 case STRONG_AL:
1307 next_type = type;
1308 break;
1309 case WEAK_EN:
1310 case WEAK_AN:
1311 /* N1: ``European and Arabic numbers are treated as
1312 though they were R.'' */
1313 next_type = STRONG_R;
1314 saved_it.next_for_neutral.type = STRONG_R;
1315 break;
1316 case WEAK_BN:
1317 if (!bidi_explicit_dir_char (bidi_it->ch))
1318 abort (); /* can't happen: BNs are skipped */
1319 /* FALLTHROUGH */
1320 case NEUTRAL_B:
1321 /* Marched all the way to the end of this level run.
1322 We need to use the eor type, whose information is
1323 stored by bidi_set_sor_type in the prev_for_neutral
1324 member. */
1325 if (saved_it.type != WEAK_BN
89d3374a 1326 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
b7b65b15
EZ
1327 {
1328 next_type = bidi_it->prev_for_neutral.type;
1329 saved_it.next_for_neutral.type = next_type;
2d6e4628 1330 bidi_check_type (next_type);
b7b65b15
EZ
1331 }
1332 else
1333 {
1334 /* This is a BN which does not adjoin neutrals.
1335 Leave its type alone. */
1336 bidi_copy_it (bidi_it, &saved_it);
1337 return bidi_it->type;
1338 }
1339 break;
1340 default:
1341 abort ();
1342 }
1343 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1344 next_type, current_level);
1345 saved_it.type = type;
2d6e4628 1346 bidi_check_type (type);
b7b65b15
EZ
1347 bidi_copy_it (bidi_it, &saved_it);
1348 }
1349 }
1350 return type;
1351}
1352
1353/* Given an iterator state in BIDI_IT, advance one character position
1354 in the buffer to the next character (in the logical order), resolve
1355 the bidi type of that next character, and return that type. */
fd3998ff 1356static bidi_type_t
b7b65b15
EZ
1357bidi_type_of_next_char (struct bidi_it *bidi_it)
1358{
1359 bidi_type_t type;
1360
1361 /* This should always be called during a forward scan. */
1362 if (bidi_it->scan_dir != 1)
1363 abort ();
1364
1365 /* Reset the limit until which to ignore BNs if we step out of the
1366 area where we found only empty levels. */
1367 if ((bidi_it->ignore_bn_limit > 0
1368 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1369 || (bidi_it->ignore_bn_limit == -1
1370 && !bidi_explicit_dir_char (bidi_it->ch)))
1371 bidi_it->ignore_bn_limit = 0;
1372
1373 type = bidi_resolve_neutral (bidi_it);
1374
1375 return type;
1376}
1377
1378/* Given an iterator state BIDI_IT, advance one character position in
1379 the buffer to the next character (in the logical order), resolve
1380 the embedding and implicit levels of that next character, and
1381 return the resulting level. */
fd3998ff 1382static int
b7b65b15
EZ
1383bidi_level_of_next_char (struct bidi_it *bidi_it)
1384{
1385 bidi_type_t type;
1386 int level, prev_level = -1;
1387 struct bidi_saved_info next_for_neutral;
1388
1389 if (bidi_it->scan_dir == 1)
1390 {
1391 /* There's no sense in trying to advance if we hit end of text. */
1502b819 1392 if (bidi_it->bytepos >= ZV_BYTE)
b7b65b15
EZ
1393 return bidi_it->resolved_level;
1394
1395 /* Record the info about the previous character. */
89d3374a 1396 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1397 && bidi_it->type != WEAK_BN)
1398 bidi_remember_char (&bidi_it->prev, bidi_it);
89d3374a
EZ
1399 if (bidi_it->type_after_w1 == STRONG_R
1400 || bidi_it->type_after_w1 == STRONG_L
1401 || bidi_it->type_after_w1 == STRONG_AL)
b7b65b15
EZ
1402 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1403 /* FIXME: it sounds like we don't need both prev and
1404 prev_for_neutral members, but I'm leaving them both for now. */
1405 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1406 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1407 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1408
1409 /* If we overstepped the characters used for resolving neutrals
1410 and whitespace, invalidate their info in the iterator. */
1411 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1412 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1413 if (bidi_it->next_en_pos >= 0
1414 && bidi_it->charpos >= bidi_it->next_en_pos)
1415 bidi_it->next_en_pos = -1;
1416 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1417 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1418 bidi_it->next_for_ws.type = UNKNOWN_BT;
1419
1420 /* This must be taken before we fill the iterator with the info
1421 about the next char. If we scan backwards, the iterator
1422 state must be already cached, so there's no need to know the
1423 embedding level of the previous character, since we will be
1424 returning to our caller shortly. */
1425 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1426 }
1427 next_for_neutral = bidi_it->next_for_neutral;
1428
1429 /* Perhaps it is already cached. */
1430 type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1431 if (type != UNKNOWN_BT)
1432 {
1433 /* Don't lose the information for resolving neutrals! The
1434 cached states could have been cached before their
1435 next_for_neutral member was computed. If we are on our way
1436 forward, we can simply take the info from the previous
1437 state. */
1438 if (bidi_it->scan_dir == 1
1439 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1440 bidi_it->next_for_neutral = next_for_neutral;
1441
1442 /* If resolved_level is -1, it means this state was cached
1443 before it was completely resolved, so we cannot return
1444 it. */
1445 if (bidi_it->resolved_level != -1)
1446 return bidi_it->resolved_level;
1447 }
1448 if (bidi_it->scan_dir == -1)
1449 /* If we are going backwards, the iterator state is already cached
1450 from previous scans, and should be fully resolved. */
1451 abort ();
1452
1453 if (type == UNKNOWN_BT)
1454 type = bidi_type_of_next_char (bidi_it);
1455
1456 if (type == NEUTRAL_B)
1457 return bidi_it->resolved_level;
1458
1459 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1460 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1461 || (type == WEAK_BN && prev_level == level))
1462 {
1463 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1464 abort ();
1465
1466 /* If the cached state shows a neutral character, it was not
1467 resolved by bidi_resolve_neutral, so do it now. */
1468 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1469 bidi_it->next_for_neutral.type,
1470 level);
1471 }
1472
1473 if (!(type == STRONG_R
1474 || type == STRONG_L
1475 || type == WEAK_BN
1476 || type == WEAK_EN
1477 || type == WEAK_AN))
1478 abort ();
1479 bidi_it->type = type;
2d6e4628 1480 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1481
1482 /* For L1 below, we need to know, for each WS character, whether
1483 it belongs to a sequence of WS characters preceeding a newline
1484 or a TAB or a paragraph separator. */
89d3374a 1485 if (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
1486 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1487 {
1488 int ch;
1489 int clen = bidi_it->ch_len;
6bff6497
EZ
1490 EMACS_INT bpos = bidi_it->bytepos;
1491 EMACS_INT cpos = bidi_it->charpos;
b7b65b15
EZ
1492 bidi_type_t chtype;
1493
1494 do {
1495 /*_fetch_multibyte_char_len = 1;*/
e7402cb2 1496 ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
b7b65b15
EZ
1497 bpos += clen;
1498 cpos++;
e7402cb2
EZ
1499 clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1500 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
b7b65b15
EZ
1501 chtype = NEUTRAL_B;
1502 else
6bff6497 1503 chtype = bidi_get_type (ch, NEUTRAL_DIR);
b7b65b15
EZ
1504 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1505 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1506 bidi_it->next_for_ws.type = chtype;
2d6e4628 1507 bidi_check_type (bidi_it->next_for_ws.type);
b7b65b15
EZ
1508 bidi_it->next_for_ws.charpos = cpos;
1509 bidi_it->next_for_ws.bytepos = bpos;
1510 }
1511
1512 /* Resolve implicit levels, with a twist: PDFs get the embedding
1513 level of the enbedding they terminate. See below for the
1514 reason. */
89d3374a 1515 if (bidi_it->orig_type == PDF
b7b65b15
EZ
1516 /* Don't do this if this formatting code didn't change the
1517 embedding level due to invalid or empty embeddings. */
1518 && prev_level != level)
1519 {
1520 /* Don't look in UAX#9 for the reason for this: it's our own
1521 private quirk. The reason is that we want the formatting
1522 codes to be delivered so that they bracket the text of their
1523 embedding. For example, given the text
1524
1525 {RLO}teST{PDF}
1526
1527 we want it to be displayed as
1528
1529 {RLO}STet{PDF}
1530
1531 not as
1532
1533 STet{RLO}{PDF}
1534
1535 which will result because we bump up the embedding level as
1536 soon as we see the RLO and pop it as soon as we see the PDF,
1537 so RLO itself has the same embedding level as "teST", and
1538 thus would be normally delivered last, just before the PDF.
1539 The switch below fiddles with the level of PDF so that this
1540 ugly side effect does not happen.
1541
1542 (This is, of course, only important if the formatting codes
e7402cb2
EZ
1543 are actually displayed, but Emacs does need to display them
1544 if the user wants to.) */
b7b65b15
EZ
1545 level = prev_level;
1546 }
89d3374a
EZ
1547 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1548 || bidi_it->orig_type == NEUTRAL_S
e7402cb2
EZ
1549 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1550 /* || bidi_it->ch == LINESEP_CHAR */
89d3374a 1551 || (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
1552 && (bidi_it->next_for_ws.type == NEUTRAL_B
1553 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1554 level = bidi_it->level_stack[0].level;
1555 else if ((level & 1) == 0) /* I1 */
1556 {
1557 if (type == STRONG_R)
1558 level++;
1559 else if (type == WEAK_EN || type == WEAK_AN)
1560 level += 2;
1561 }
1562 else /* I2 */
1563 {
1564 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1565 level++;
1566 }
1567
1568 bidi_it->resolved_level = level;
1569 return level;
1570}
1571
1572/* Move to the other edge of a level given by LEVEL. If END_FLAG is
1573 non-zero, we are at the end of a level, and we need to prepare to
1574 resume the scan of the lower level.
1575
1576 If this level's other edge is cached, we simply jump to it, filling
1577 the iterator structure with the iterator state on the other edge.
1578 Otherwise, we walk the buffer until we come back to the same level
1579 as LEVEL.
1580
1581 Note: we are not talking here about a ``level run'' in the UAX#9
1582 sense of the term, but rather about a ``level'' which includes
1583 all the levels higher than it. In other words, given the levels
1584 like this:
1585
1586 11111112222222333333334443343222222111111112223322111
1587 A B C
1588
1589 and assuming we are at point A scanning left to right, this
1590 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1591 at point B. */
1592static void
1593bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1594{
1595 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1596 int idx;
1597
1598 /* Try the cache first. */
1599 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1600 bidi_cache_fetch_state (idx, bidi_it);
1601 else
1602 {
1603 int new_level;
1604
1605 if (end_flag)
1606 abort (); /* if we are at end of level, its edges must be cached */
1607
1608 bidi_cache_iterator_state (bidi_it, 1);
1609 do {
1610 new_level = bidi_level_of_next_char (bidi_it);
1611 bidi_cache_iterator_state (bidi_it, 1);
1612 } while (new_level >= level);
1613 }
1614}
1615
1616void
4b292a22 1617bidi_move_to_visually_next (struct bidi_it *bidi_it)
b7b65b15
EZ
1618{
1619 int old_level, new_level, next_level;
9c82e145 1620 struct bidi_it sentinel;
b7b65b15
EZ
1621
1622 if (bidi_it->scan_dir == 0)
1623 {
1624 bidi_it->scan_dir = 1; /* default to logical order */
1625 }
1626
be39f003
EZ
1627 /* If we just passed a newline, initialize for the next line. */
1628 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1629 bidi_line_init (bidi_it);
1630
6dcfd253
EZ
1631 /* Prepare the sentinel iterator state, and cache it. When we bump
1632 into it, scanning backwards, we'll know that the last non-base
1633 level is exhausted. */
b7b65b15 1634 if (bidi_cache_idx == 0)
9c82e145
EZ
1635 {
1636 bidi_copy_it (&sentinel, bidi_it);
1637 if (bidi_it->first_elt)
1638 {
1639 sentinel.charpos--; /* cached charpos needs to be monotonic */
1640 sentinel.bytepos--;
1641 sentinel.ch = '\n'; /* doesn't matter, but why not? */
1642 sentinel.ch_len = 1;
1643 }
6dcfd253 1644 bidi_cache_iterator_state (&sentinel, 1);
9c82e145 1645 }
b7b65b15
EZ
1646
1647 old_level = bidi_it->resolved_level;
1648 new_level = bidi_level_of_next_char (bidi_it);
b7b65b15
EZ
1649
1650 /* Reordering of resolved levels (clause L2) is implemented by
1651 jumping to the other edge of the level and flipping direction of
c0546589 1652 scanning the text whenever we find a level change. */
b7b65b15
EZ
1653 if (new_level != old_level)
1654 {
1655 int ascending = new_level > old_level;
1656 int level_to_search = ascending ? old_level + 1 : old_level;
1657 int incr = ascending ? 1 : -1;
1658 int expected_next_level = old_level + incr;
1659
b7b65b15
EZ
1660 /* Jump (or walk) to the other edge of this level. */
1661 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1662 /* Switch scan direction and peek at the next character in the
1663 new direction. */
1664 bidi_it->scan_dir = -bidi_it->scan_dir;
1665
1666 /* The following loop handles the case where the resolved level
1667 jumps by more than one. This is typical for numbers inside a
1668 run of text with left-to-right embedding direction, but can
1669 also happen in other situations. In those cases the decision
1670 where to continue after a level change, and in what direction,
1671 is tricky. For example, given a text like below:
1672
1673 abcdefgh
1674 11336622
1675
1676 (where the numbers below the text show the resolved levels),
1677 the result of reordering according to UAX#9 should be this:
1678
1679 efdcghba
1680
1681 This is implemented by the loop below which flips direction
1682 and jumps to the other edge of the level each time it finds
1683 the new level not to be the expected one. The expected level
1684 is always one more or one less than the previous one. */
1685 next_level = bidi_peek_at_next_level (bidi_it);
1686 while (next_level != expected_next_level)
1687 {
1688 expected_next_level += incr;
1689 level_to_search += incr;
1690 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1691 bidi_it->scan_dir = -bidi_it->scan_dir;
1692 next_level = bidi_peek_at_next_level (bidi_it);
1693 }
1694
1695 /* Finally, deliver the next character in the new direction. */
1696 next_level = bidi_level_of_next_char (bidi_it);
1697 }
1698
b44d9321
EZ
1699 /* Take note when we have just processed the newline that precedes
1700 the end of the paragraph. The next time we are about to be
1701 called, set_iterator_to_next will automatically reinit the
1702 paragraph direction, if needed. We do this at the newline before
1703 the paragraph separator, because the next character might not be
1704 the first character of the next paragraph, due to the bidi
c0546589
EZ
1705 reordering, whereas we _must_ know the paragraph base direction
1706 _before_ we process the paragraph's text, since the base
1707 direction affects the reordering. */
6bff6497 1708 if (bidi_it->scan_dir == 1
be39f003
EZ
1709 && bidi_it->orig_type == NEUTRAL_B
1710 && bidi_it->bytepos < ZV_BYTE)
1711 {
1712 EMACS_INT sep_len =
1713 bidi_at_paragraph_end (bidi_it->charpos + 1,
1714 bidi_it->bytepos + bidi_it->ch_len);
1715 if (sep_len >= 0)
1716 {
1717 bidi_it->new_paragraph = 1;
b44d9321
EZ
1718 /* Record the buffer position of the last character of the
1719 paragraph separator. */
1720 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
be39f003
EZ
1721 }
1722 }
6bff6497 1723
b7b65b15
EZ
1724 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1725 {
1726 /* If we are at paragraph's base embedding level and beyond the
1727 last cached position, the cache's job is done and we can
1728 discard it. */
1729 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1730 && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1731 bidi_cache_reset ();
1732 /* But as long as we are caching during forward scan, we must
1733 cache each state, or else the cache integrity will be
1734 compromised: it assumes cached states correspond to buffer
1735 positions 1:1. */
1736 else
1737 bidi_cache_iterator_state (bidi_it, 1);
1738 }
1739}
1740
1741/* This is meant to be called from within the debugger, whenever you
1742 wish to examine the cache contents. */
1743void
1744bidi_dump_cached_states (void)
1745{
1746 int i;
1747 int ndigits = 1;
1748
1749 if (bidi_cache_idx == 0)
1750 {
1751 fprintf (stderr, "The cache is empty.\n");
1752 return;
1753 }
1754 fprintf (stderr, "Total of %d state%s in cache:\n",
1755 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1756
1757 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1758 ndigits++;
1759 fputs ("ch ", stderr);
1760 for (i = 0; i < bidi_cache_idx; i++)
1761 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1762 fputs ("\n", stderr);
1763 fputs ("lvl ", stderr);
1764 for (i = 0; i < bidi_cache_idx; i++)
1765 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1766 fputs ("\n", stderr);
1767 fputs ("pos ", stderr);
1768 for (i = 0; i < bidi_cache_idx; i++)
1769 fprintf (stderr, "%*d", ndigits, bidi_cache[i].charpos);
1770 fputs ("\n", stderr);
1771}