[ChangeLog]
[bpt/emacs.git] / src / bidi.c
CommitLineData
b7b65b15 1/* Low-level bidirectional buffer-scanning functions for GNU Emacs.
73b0cd50 2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
b118e65f 3 Free Software Foundation, Inc.
b7b65b15
EZ
4
5This file is part of GNU Emacs.
6
a8d11bd3 7GNU Emacs is free software: you can redistribute it and/or modify
b7b65b15 8it under the terms of the GNU General Public License as published by
a8d11bd3
EZ
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
b7b65b15
EZ
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
b7b65b15 17You should have received a copy of the GNU General Public License
a8d11bd3 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
b7b65b15 19
2d6e4628
EZ
20/* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
b7b65b15
EZ
23 as per UAX#9, a part of the Unicode Standard.
24
25 Unlike the reference and most other implementations, this one is
940afb59
EZ
26 designed to be called once for every character in the buffer or
27 string.
b7b65b15 28
4b292a22 29 The main entry point is bidi_move_to_visually_next. Each time it
b7b65b15
EZ
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
4b292a22
EZ
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
b7b65b15
EZ
36 character by resolving their levels on the fly.
37
940afb59
EZ
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
41 argument character.
42
89d3374a
EZ
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
46
b7b65b15
EZ
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
51
b7b65b15 52#include <config.h>
b7b65b15 53#include <stdio.h>
29e3d8d1
EZ
54#include <setjmp.h>
55
b7b65b15
EZ
56#include "lisp.h"
57#include "buffer.h"
58#include "character.h"
59#include "dispextern.h"
60
61static int bidi_initialized = 0;
62
cbc4fd20 63static Lisp_Object bidi_type_table, bidi_mirror_table;
b7b65b15 64
e7402cb2 65/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */
b7b65b15
EZ
66#define LRM_CHAR 0x200E
67#define RLM_CHAR 0x200F
68#define LRE_CHAR 0x202A
69#define RLE_CHAR 0x202B
70#define PDF_CHAR 0x202C
71#define LRO_CHAR 0x202D
72#define RLO_CHAR 0x202E
73
b7b65b15 74#define BIDI_EOB -1
b7b65b15 75
b7b65b15
EZ
76/* Local data structures. (Look in dispextern.h for the rest.) */
77
78/* What we need to know about the current paragraph. */
79struct bidi_paragraph_info {
61bfec98
EZ
80 EMACS_INT start_bytepos; /* byte position where it begins */
81 EMACS_INT end_bytepos; /* byte position where it ends */
82 int embedding_level; /* its basic embedding level */
83 bidi_dir_t base_dir; /* its base direction */
b7b65b15
EZ
84};
85
86/* Data type for describing the bidirectional character categories. */
87typedef enum {
88 UNKNOWN_BC,
89 NEUTRAL,
90 WEAK,
91 STRONG
92} bidi_category_t;
93
e78aecca 94extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
b7b65b15
EZ
95int bidi_ignore_explicit_marks_for_paragraph_level = 1;
96
372b7a95 97static Lisp_Object paragraph_start_re, paragraph_separate_re;
6bff6497 98static Lisp_Object Qparagraph_start, Qparagraph_separate;
b7b65b15 99
b7b65b15 100static void
971de7fb 101bidi_initialize (void)
b7b65b15 102{
317fbf33
EZ
103
104#include "biditype.h"
cbc4fd20 105#include "bidimirror.h"
317fbf33 106
b7b65b15
EZ
107 int i;
108
109 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
2d6e4628 110 staticpro (&bidi_type_table);
b7b65b15
EZ
111
112 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
317fbf33 113 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
b7b65b15 114 make_number (bidi_type[i].type));
6bff6497 115
cbc4fd20
EZ
116 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
117 staticpro (&bidi_mirror_table);
118
119 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
120 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
121 make_number (bidi_mirror[i].to));
122
b4bf28b7
SM
123 Qparagraph_start = intern ("paragraph-start");
124 staticpro (&Qparagraph_start);
372b7a95
EZ
125 paragraph_start_re = Fsymbol_value (Qparagraph_start);
126 if (!STRINGP (paragraph_start_re))
127 paragraph_start_re = build_string ("\f\\|[ \t]*$");
128 staticpro (&paragraph_start_re);
b4bf28b7
SM
129 Qparagraph_separate = intern ("paragraph-separate");
130 staticpro (&Qparagraph_separate);
372b7a95
EZ
131 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
132 if (!STRINGP (paragraph_separate_re))
133 paragraph_separate_re = build_string ("[ \t\f]*$");
134 staticpro (&paragraph_separate_re);
b7b65b15
EZ
135 bidi_initialized = 1;
136}
137
6bff6497
EZ
138/* Return the bidi type of a character CH, subject to the current
139 directional OVERRIDE. */
55d4c1b2 140static inline bidi_type_t
6bff6497 141bidi_get_type (int ch, bidi_dir_t override)
b7b65b15 142{
6bff6497
EZ
143 bidi_type_t default_type;
144
e7402cb2
EZ
145 if (ch == BIDI_EOB)
146 return NEUTRAL_B;
e342a24d
EZ
147 if (ch < 0 || ch > MAX_CHAR)
148 abort ();
6bff6497
EZ
149
150 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
151
152 if (override == NEUTRAL_DIR)
153 return default_type;
154
155 switch (default_type)
156 {
157 /* Although UAX#9 does not tell, it doesn't make sense to
158 override NEUTRAL_B and LRM/RLM characters. */
159 case NEUTRAL_B:
160 case LRE:
161 case LRO:
162 case RLE:
163 case RLO:
164 case PDF:
165 return default_type;
166 default:
167 switch (ch)
168 {
169 case LRM_CHAR:
170 case RLM_CHAR:
171 return default_type;
172 default:
173 if (override == L2R) /* X6 */
174 return STRONG_L;
175 else if (override == R2L)
176 return STRONG_R;
177 else
178 abort (); /* can't happen: handled above */
179 }
180 }
b7b65b15
EZ
181}
182
7d3b3862 183static void
2d6e4628
EZ
184bidi_check_type (bidi_type_t type)
185{
186 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
187 abort ();
188}
189
b7b65b15 190/* Given a bidi TYPE of a character, return its category. */
55d4c1b2 191static inline bidi_category_t
b7b65b15
EZ
192bidi_get_category (bidi_type_t type)
193{
194 switch (type)
195 {
196 case UNKNOWN_BT:
197 return UNKNOWN_BC;
198 case STRONG_L:
199 case STRONG_R:
200 case STRONG_AL:
201 case LRE:
202 case LRO:
203 case RLE:
204 case RLO:
205 return STRONG;
206 case PDF: /* ??? really?? */
207 case WEAK_EN:
208 case WEAK_ES:
209 case WEAK_ET:
210 case WEAK_AN:
211 case WEAK_CS:
212 case WEAK_NSM:
213 case WEAK_BN:
214 return WEAK;
215 case NEUTRAL_B:
216 case NEUTRAL_S:
217 case NEUTRAL_WS:
218 case NEUTRAL_ON:
219 return NEUTRAL;
220 default:
221 abort ();
222 }
223}
224
a6e8d97c
EZ
225/* Return the mirrored character of C, if it has one. If C has no
226 mirrored counterpart, return C.
227 Note: The conditions in UAX#9 clause L4 regarding the surrounding
228 context must be tested by the caller. */
b7b65b15
EZ
229int
230bidi_mirror_char (int c)
231{
cbc4fd20
EZ
232 Lisp_Object val;
233
234 if (c == BIDI_EOB)
235 return c;
236 if (c < 0 || c > MAX_CHAR)
237 abort ();
b7b65b15 238
cbc4fd20
EZ
239 val = CHAR_TABLE_REF (bidi_mirror_table, c);
240 if (INTEGERP (val))
b7b65b15 241 {
cbc4fd20 242 int v = XINT (val);
b7b65b15 243
cbc4fd20
EZ
244 if (v < 0 || v > MAX_CHAR)
245 abort ();
246
247 return v;
b7b65b15 248 }
cbc4fd20 249
b7b65b15
EZ
250 return c;
251}
252
253/* Copy the bidi iterator from FROM to TO. To save cycles, this only
254 copies the part of the level stack that is actually in use. */
55d4c1b2 255static inline void
b7b65b15
EZ
256bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
257{
258 int i;
259
e69a9370
EZ
260 /* Copy everything except the level stack and beyond. */
261 memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
b7b65b15
EZ
262
263 /* Copy the active part of the level stack. */
264 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
265 for (i = 1; i <= from->stack_idx; i++)
266 to->level_stack[i] = from->level_stack[i];
267}
268
269/* Caching the bidi iterator states. */
270
2fe72643
EZ
271#define BIDI_CACHE_CHUNK 200
272static struct bidi_it *bidi_cache;
273static size_t bidi_cache_size = 0;
0416466c 274static size_t elsz = sizeof (struct bidi_it);
2fe72643
EZ
275static int bidi_cache_idx; /* next unused cache slot */
276static int bidi_cache_last_idx; /* slot of last cache hit */
b7b65b15 277
55d4c1b2 278static inline void
b7b65b15
EZ
279bidi_cache_reset (void)
280{
281 bidi_cache_idx = 0;
282 bidi_cache_last_idx = -1;
283}
284
55d4c1b2 285static inline void
2fe72643
EZ
286bidi_cache_shrink (void)
287{
288 if (bidi_cache_size > BIDI_CACHE_CHUNK)
289 {
0416466c
EZ
290 bidi_cache_size = BIDI_CACHE_CHUNK;
291 bidi_cache =
292 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643
EZ
293 }
294 bidi_cache_reset ();
295}
296
55d4c1b2 297static inline void
b7b65b15
EZ
298bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
299{
300 int current_scan_dir = bidi_it->scan_dir;
301
302 if (idx < 0 || idx >= bidi_cache_idx)
303 abort ();
304
305 bidi_copy_it (bidi_it, &bidi_cache[idx]);
306 bidi_it->scan_dir = current_scan_dir;
307 bidi_cache_last_idx = idx;
308}
309
310/* Find a cached state with a given CHARPOS and resolved embedding
311 level less or equal to LEVEL. if LEVEL is -1, disregard the
312 resolved levels in cached states. DIR, if non-zero, means search
313 in that direction from the last cache hit. */
55d4c1b2 314static inline int
61bfec98 315bidi_cache_search (EMACS_INT charpos, int level, int dir)
b7b65b15
EZ
316{
317 int i, i_start;
318
319 if (bidi_cache_idx)
320 {
321 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
322 dir = -1;
323 else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
324 dir = 1;
325 if (dir)
326 i_start = bidi_cache_last_idx;
327 else
328 {
329 dir = -1;
330 i_start = bidi_cache_idx - 1;
331 }
332
333 if (dir < 0)
334 {
335 /* Linear search for now; FIXME! */
336 for (i = i_start; i >= 0; i--)
337 if (bidi_cache[i].charpos == charpos
338 && (level == -1 || bidi_cache[i].resolved_level <= level))
339 return i;
340 }
341 else
342 {
343 for (i = i_start; i < bidi_cache_idx; i++)
344 if (bidi_cache[i].charpos == charpos
345 && (level == -1 || bidi_cache[i].resolved_level <= level))
346 return i;
347 }
348 }
349
350 return -1;
351}
352
353/* Find a cached state where the resolved level changes to a value
354 that is lower than LEVEL, and return its cache slot index. DIR is
355 the direction to search, starting with the last used cache slot.
356 BEFORE, if non-zero, means return the index of the slot that is
357 ``before'' the level change in the search direction. That is,
358 given the cached levels like this:
359
360 1122333442211
361 AB C
362
363 and assuming we are at the position cached at the slot marked with
364 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
365 index of slot B or A, depending whether BEFORE is, respectively,
366 non-zero or zero. */
367static int
368bidi_cache_find_level_change (int level, int dir, int before)
369{
370 if (bidi_cache_idx)
371 {
372 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
373 int incr = before ? 1 : 0;
374
375 if (!dir)
376 dir = -1;
377 else if (!incr)
378 i += dir;
379
380 if (dir < 0)
381 {
382 while (i >= incr)
383 {
384 if (bidi_cache[i - incr].resolved_level >= 0
385 && bidi_cache[i - incr].resolved_level < level)
386 return i;
387 i--;
388 }
389 }
390 else
391 {
392 while (i < bidi_cache_idx - incr)
393 {
394 if (bidi_cache[i + incr].resolved_level >= 0
395 && bidi_cache[i + incr].resolved_level < level)
396 return i;
397 i++;
398 }
399 }
400 }
401
402 return -1;
403}
404
55d4c1b2 405static inline void
b7b65b15
EZ
406bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
407{
408 int idx;
409
410 /* We should never cache on backward scans. */
411 if (bidi_it->scan_dir == -1)
412 abort ();
413 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
414
415 if (idx < 0)
416 {
417 idx = bidi_cache_idx;
2fe72643
EZ
418 /* Enlarge the cache as needed. */
419 if (idx >= bidi_cache_size)
420 {
0416466c 421 bidi_cache_size += BIDI_CACHE_CHUNK;
2fe72643 422 bidi_cache =
0416466c 423 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643 424 }
bd924a5d
EZ
425 /* Character positions should correspond to cache positions 1:1.
426 If we are outside the range of cached positions, the cache is
427 useless and must be reset. */
428 if (idx > 0 &&
429 (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
430 || bidi_it->charpos < bidi_cache[0].charpos))
431 {
432 bidi_cache_reset ();
433 idx = 0;
434 }
b7b65b15
EZ
435 bidi_copy_it (&bidi_cache[idx], bidi_it);
436 if (!resolved)
437 bidi_cache[idx].resolved_level = -1;
438 }
439 else
440 {
441 /* Copy only the members which could have changed, to avoid
442 costly copying of the entire struct. */
443 bidi_cache[idx].type = bidi_it->type;
2d6e4628 444 bidi_check_type (bidi_it->type);
89d3374a
EZ
445 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
446 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
447 if (resolved)
448 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
449 else
450 bidi_cache[idx].resolved_level = -1;
451 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
452 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
453 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
454 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
455 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
456 }
457
458 bidi_cache_last_idx = idx;
459 if (idx >= bidi_cache_idx)
460 bidi_cache_idx = idx + 1;
461}
462
55d4c1b2 463static inline bidi_type_t
61bfec98 464bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
b7b65b15
EZ
465{
466 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
467
468 if (i >= 0)
469 {
470 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
471
5009f85e 472 bidi_copy_it (bidi_it, &bidi_cache[i]);
b7b65b15
EZ
473 bidi_cache_last_idx = i;
474 /* Don't let scan direction from from the cached state override
475 the current scan direction. */
476 bidi_it->scan_dir = current_scan_dir;
477 return bidi_it->type;
478 }
479
480 return UNKNOWN_BT;
481}
482
55d4c1b2 483static inline int
b7b65b15
EZ
484bidi_peek_at_next_level (struct bidi_it *bidi_it)
485{
486 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
487 abort ();
488 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
489}
490
be39f003
EZ
491/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
492 Value is the non-negative length of the paragraph separator
493 following the buffer position, -1 if position is at the beginning
494 of a new paragraph, or -2 if position is neither at beginning nor
495 at end of a paragraph. */
fd3998ff 496static EMACS_INT
6bff6497 497bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
b7b65b15 498{
372b7a95
EZ
499 Lisp_Object sep_re;
500 Lisp_Object start_re;
be39f003
EZ
501 EMACS_INT val;
502
372b7a95
EZ
503 sep_re = paragraph_separate_re;
504 start_re = paragraph_start_re;
be39f003
EZ
505
506 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
507 if (val < 0)
508 {
509 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
510 val = -1;
511 else
512 val = -2;
513 }
b7b65b15 514
be39f003 515 return val;
b7b65b15
EZ
516}
517
518/* Determine the start-of-run (sor) directional type given the two
519 embedding levels on either side of the run boundary. Also, update
520 the saved info about previously seen characters, since that info is
521 generally valid for a single level run. */
55d4c1b2 522static inline void
b7b65b15
EZ
523bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
524{
525 int higher_level = level_before > level_after ? level_before : level_after;
526
527 /* The prev_was_pdf gork is required for when we have several PDFs
528 in a row. In that case, we want to compute the sor type for the
529 next level run only once: when we see the first PDF. That's
530 because the sor type depends only on the higher of the two levels
531 that we find on the two sides of the level boundary (see UAX#9,
532 clause X10), and so we don't need to know the final embedding
533 level to which we descend after processing all the PDFs. */
e342a24d 534 if (!bidi_it->prev_was_pdf || level_before < level_after)
b7b65b15
EZ
535 /* FIXME: should the default sor direction be user selectable? */
536 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
537 if (level_before > level_after)
538 bidi_it->prev_was_pdf = 1;
539
540 bidi_it->prev.type = UNKNOWN_BT;
89d3374a
EZ
541 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
542 bidi_it->last_strong.orig_type = UNKNOWN_BT;
b7b65b15
EZ
543 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
544 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
545 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
89d3374a
EZ
546 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
547 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15
EZ
548 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
549}
550
6bff6497 551static void
be39f003
EZ
552bidi_line_init (struct bidi_it *bidi_it)
553{
554 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
555 bidi_it->resolved_level = bidi_it->level_stack[0].level;
556 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
557 bidi_it->invalid_levels = 0;
558 bidi_it->invalid_rl_levels = -1;
559 bidi_it->next_en_pos = -1;
560 bidi_it->next_for_ws.type = UNKNOWN_BT;
b44d9321
EZ
561 bidi_set_sor_type (bidi_it,
562 bidi_it->paragraph_dir == R2L ? 1 : 0,
be39f003
EZ
563 bidi_it->level_stack[0].level); /* X10 */
564
565 bidi_cache_reset ();
566}
567
568/* Find the beginning of this paragraph by looking back in the buffer.
569 Value is the byte position of the paragraph's beginning. */
570static EMACS_INT
b44d9321 571bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
6bff6497 572{
372b7a95 573 Lisp_Object re = paragraph_start_re;
6bff6497
EZ
574 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
575
6bff6497
EZ
576 while (pos_byte > BEGV_BYTE
577 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
578 {
be39f003
EZ
579 pos = find_next_newline_no_quit (pos - 1, -1);
580 pos_byte = CHAR_TO_BYTE (pos);
6bff6497 581 }
be39f003 582 return pos_byte;
6bff6497
EZ
583}
584
bea4f10c 585/* Determine the base direction, a.k.a. base embedding level, of the
b44d9321
EZ
586 paragraph we are about to iterate through. If DIR is either L2R or
587 R2L, just use that. Otherwise, determine the paragraph direction
bea4f10c
EZ
588 from the first strong directional character of the paragraph.
589
590 NO_DEFAULT_P non-nil means don't default to L2R if the paragraph
591 has no strong directional characters and both DIR and
592 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
593 in the buffer until a paragraph is found with a strong character,
594 or until hitting BEGV. In the latter case, fall back to L2R. This
595 flag is used in current-bidi-paragraph-direction.
596
597 Note that this function gives the paragraph separator the same
598 direction as the preceding paragraph, even though Emacs generally
599 views the separartor as not belonging to any paragraph. */
b7b65b15 600void
bea4f10c 601bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
b7b65b15 602{
6bff6497 603 EMACS_INT bytepos = bidi_it->bytepos;
bea4f10c 604 EMACS_INT pstartbyte;
e7402cb2 605
5e65aec0
EZ
606 /* Special case for an empty buffer. */
607 if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
608 dir = L2R;
9c82e145 609 /* We should never be called at EOB or before BEGV. */
5e65aec0 610 else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
9c82e145
EZ
611 abort ();
612
be39f003
EZ
613 if (dir == L2R)
614 {
615 bidi_it->paragraph_dir = L2R;
616 bidi_it->new_paragraph = 0;
617 }
618 else if (dir == R2L)
619 {
620 bidi_it->paragraph_dir = R2L;
621 bidi_it->new_paragraph = 0;
622 }
b7b65b15
EZ
623 else if (dir == NEUTRAL_DIR) /* P2 */
624 {
6bff6497
EZ
625 int ch, ch_len;
626 EMACS_INT pos;
627 bidi_type_t type;
be39f003 628
d20e1419
EZ
629 if (!bidi_initialized)
630 bidi_initialize ();
631
be39f003
EZ
632 /* If we are inside a paragraph separator, we are just waiting
633 for the separator to be exhausted; use the previous paragraph
e5a2fec7
EZ
634 direction. But don't do that if we have been just reseated,
635 because we need to reinitialize below in that case. */
636 if (!bidi_it->first_elt
637 && bidi_it->charpos < bidi_it->separator_limit)
be39f003
EZ
638 return;
639
b44d9321 640 /* If we are on a newline, get past it to where the next
5e65aec0
EZ
641 paragraph might start. But don't do that at BEGV since then
642 we are potentially in a new paragraph that doesn't yet
643 exist. */
c143c213 644 pos = bidi_it->charpos;
5e65aec0 645 if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
be39f003 646 {
b44d9321 647 bytepos++;
c143c213 648 pos++;
be39f003 649 }
b44d9321
EZ
650
651 /* We are either at the beginning of a paragraph or in the
652 middle of it. Find where this paragraph starts. */
bea4f10c 653 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
be39f003
EZ
654 bidi_it->separator_limit = -1;
655 bidi_it->new_paragraph = 0;
bea4f10c
EZ
656
657 /* The following loop is run more than once only if NO_DEFAULT_P
658 is non-zero. */
659 do {
660 bytepos = pstartbyte;
661 ch = FETCH_CHAR (bytepos);
662 ch_len = CHAR_BYTES (ch);
663 pos = BYTE_TO_CHAR (bytepos);
664 type = bidi_get_type (ch, NEUTRAL_DIR);
665
666 for (pos++, bytepos += ch_len;
667 /* NOTE: UAX#9 says to search only for L, AL, or R types
668 of characters, and ignore RLE, RLO, LRE, and LRO.
669 However, I'm not sure it makes sense to omit those 4;
670 should try with and without that to see the effect. */
671 (bidi_get_category (type) != STRONG)
672 || (bidi_ignore_explicit_marks_for_paragraph_level
673 && (type == RLE || type == RLO
674 || type == LRE || type == LRO));
675 type = bidi_get_type (ch, NEUTRAL_DIR))
676 {
677 if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
21fce5ab 678 break;
bea4f10c
EZ
679 if (bytepos >= ZV_BYTE)
680 {
681 /* Pretend there's a paragraph separator at end of
682 buffer. */
683 type = NEUTRAL_B;
684 break;
685 }
686 FETCH_CHAR_ADVANCE (ch, pos, bytepos);
687 }
688 if (type == STRONG_R || type == STRONG_AL) /* P3 */
689 bidi_it->paragraph_dir = R2L;
690 else if (type == STRONG_L)
691 bidi_it->paragraph_dir = L2R;
692 if (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
693 {
694 /* If this paragraph is at BEGV, default to L2R. */
695 if (pstartbyte == BEGV_BYTE)
696 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
697 else
698 {
699 EMACS_INT prevpbyte = pstartbyte;
700 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
701
702 /* Find the beginning of the previous paragraph, if any. */
703 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
704 {
705 p--;
706 pbyte = CHAR_TO_BYTE (p);
707 prevpbyte = bidi_find_paragraph_start (p, pbyte);
708 }
709 pstartbyte = prevpbyte;
710 }
711 }
712 } while (no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
b7b65b15 713 }
be39f003
EZ
714 else
715 abort ();
b7b65b15 716
b44d9321
EZ
717 /* Contrary to UAX#9 clause P3, we only default the paragraph
718 direction to L2R if we have no previous usable paragraph
bea4f10c 719 direction. This is allowed by the HL1 clause. */
d20e1419 720 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
bea4f10c 721 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
be39f003 722 if (bidi_it->paragraph_dir == R2L)
b44d9321 723 bidi_it->level_stack[0].level = 1;
be39f003 724 else
b44d9321 725 bidi_it->level_stack[0].level = 0;
be39f003
EZ
726
727 bidi_line_init (bidi_it);
b7b65b15
EZ
728}
729
6bff6497
EZ
730/* Do whatever UAX#9 clause X8 says should be done at paragraph's
731 end. */
55d4c1b2 732static inline void
b7b65b15
EZ
733bidi_set_paragraph_end (struct bidi_it *bidi_it)
734{
735 bidi_it->invalid_levels = 0;
736 bidi_it->invalid_rl_levels = -1;
737 bidi_it->stack_idx = 0;
738 bidi_it->resolved_level = bidi_it->level_stack[0].level;
b7b65b15
EZ
739}
740
89d3374a 741/* Initialize the bidi iterator from buffer position CHARPOS. */
b7b65b15 742void
6bff6497 743bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
b7b65b15
EZ
744{
745 if (! bidi_initialized)
746 bidi_initialize ();
89d3374a
EZ
747 bidi_it->charpos = charpos;
748 bidi_it->bytepos = bytepos;
6bff6497
EZ
749 bidi_it->first_elt = 1;
750 bidi_set_paragraph_end (bidi_it);
751 bidi_it->new_paragraph = 1;
be39f003 752 bidi_it->separator_limit = -1;
b7b65b15 753 bidi_it->type = NEUTRAL_B;
ebb5722e
EZ
754 bidi_it->type_after_w1 = NEUTRAL_B;
755 bidi_it->orig_type = NEUTRAL_B;
b7b65b15 756 bidi_it->prev_was_pdf = 0;
ebb5722e
EZ
757 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
758 bidi_it->prev.orig_type = UNKNOWN_BT;
89d3374a
EZ
759 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
760 bidi_it->last_strong.orig_type = UNKNOWN_BT;
b7b65b15
EZ
761 bidi_it->next_for_neutral.charpos = -1;
762 bidi_it->next_for_neutral.type =
89d3374a
EZ
763 bidi_it->next_for_neutral.type_after_w1 =
764 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15
EZ
765 bidi_it->prev_for_neutral.charpos = -1;
766 bidi_it->prev_for_neutral.type =
89d3374a
EZ
767 bidi_it->prev_for_neutral.type_after_w1 =
768 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15 769 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
2fe72643 770 bidi_cache_shrink ();
b7b65b15
EZ
771}
772
773/* Push the current embedding level and override status; reset the
774 current level to LEVEL and the current override status to OVERRIDE. */
55d4c1b2 775static inline void
b7b65b15
EZ
776bidi_push_embedding_level (struct bidi_it *bidi_it,
777 int level, bidi_dir_t override)
778{
779 bidi_it->stack_idx++;
780 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
781 abort ();
782 bidi_it->level_stack[bidi_it->stack_idx].level = level;
783 bidi_it->level_stack[bidi_it->stack_idx].override = override;
784}
785
786/* Pop the embedding level and directional override status from the
787 stack, and return the new level. */
55d4c1b2 788static inline int
b7b65b15
EZ
789bidi_pop_embedding_level (struct bidi_it *bidi_it)
790{
791 /* UAX#9 says to ignore invalid PDFs. */
792 if (bidi_it->stack_idx > 0)
793 bidi_it->stack_idx--;
794 return bidi_it->level_stack[bidi_it->stack_idx].level;
795}
796
797/* Record in SAVED_INFO the information about the current character. */
55d4c1b2 798static inline void
b7b65b15
EZ
799bidi_remember_char (struct bidi_saved_info *saved_info,
800 struct bidi_it *bidi_it)
801{
802 saved_info->charpos = bidi_it->charpos;
803 saved_info->bytepos = bidi_it->bytepos;
804 saved_info->type = bidi_it->type;
2d6e4628 805 bidi_check_type (bidi_it->type);
89d3374a
EZ
806 saved_info->type_after_w1 = bidi_it->type_after_w1;
807 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 808 saved_info->orig_type = bidi_it->orig_type;
2d6e4628 809 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
810}
811
812/* Resolve the type of a neutral character according to the type of
813 surrounding strong text and the current embedding level. */
55d4c1b2 814static inline bidi_type_t
b7b65b15
EZ
815bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
816{
817 /* N1: European and Arabic numbers are treated as though they were R. */
818 if (next_type == WEAK_EN || next_type == WEAK_AN)
819 next_type = STRONG_R;
820 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
821 prev_type = STRONG_R;
822
823 if (next_type == prev_type) /* N1 */
824 return next_type;
825 else if ((lev & 1) == 0) /* N2 */
826 return STRONG_L;
827 else
828 return STRONG_R;
829}
830
55d4c1b2 831static inline int
b7b65b15
EZ
832bidi_explicit_dir_char (int c)
833{
834 /* FIXME: this should be replaced with a lookup table with suitable
835 bits set, like standard C ctype macros do. */
836 return (c == LRE_CHAR || c == LRO_CHAR
837 || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
838}
839
840/* A helper function for bidi_resolve_explicit. It advances to the
841 next character in logical order and determines the new embedding
842 level and directional override, but does not take into account
843 empty embeddings. */
844static int
845bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
846{
847 int curchar;
848 bidi_type_t type;
849 int current_level;
850 int new_level;
851 bidi_dir_t override;
852
9c82e145
EZ
853 if (bidi_it->bytepos < BEGV_BYTE /* after reseat to BEGV? */
854 || bidi_it->first_elt)
e7402cb2 855 {
9c82e145
EZ
856 bidi_it->first_elt = 0;
857 if (bidi_it->charpos < BEGV)
858 bidi_it->charpos = BEGV;
859 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
e7402cb2 860 }
9c82e145 861 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
b7b65b15
EZ
862 {
863 bidi_it->charpos++;
e7402cb2
EZ
864 if (bidi_it->ch_len == 0)
865 abort ();
b7b65b15
EZ
866 bidi_it->bytepos += bidi_it->ch_len;
867 }
868
869 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
870 override = bidi_it->level_stack[bidi_it->stack_idx].override;
871 new_level = current_level;
872
873 /* in case it is a unibyte character (not yet implemented) */
874 /* _fetch_multibyte_char_len = 1; */
e7402cb2
EZ
875 if (bidi_it->bytepos >= ZV_BYTE)
876 {
877 curchar = BIDI_EOB;
878 bidi_it->ch_len = 1;
879 }
880 else
881 {
882 curchar = FETCH_CHAR (bidi_it->bytepos);
883 bidi_it->ch_len = CHAR_BYTES (curchar);
884 }
b7b65b15 885 bidi_it->ch = curchar;
b7b65b15 886
6bff6497
EZ
887 /* Don't apply directional override here, as all the types we handle
888 below will not be affected by the override anyway, and we need
889 the original type unaltered. The override will be applied in
890 bidi_resolve_weak. */
891 type = bidi_get_type (curchar, NEUTRAL_DIR);
89d3374a
EZ
892 bidi_it->orig_type = type;
893 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
894
895 if (type != PDF)
896 bidi_it->prev_was_pdf = 0;
897
89d3374a 898 bidi_it->type_after_w1 = UNKNOWN_BT;
b7b65b15
EZ
899
900 switch (type)
901 {
902 case RLE: /* X2 */
903 case RLO: /* X4 */
89d3374a
EZ
904 bidi_it->type_after_w1 = type;
905 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
906 type = WEAK_BN; /* X9/Retaining */
907 if (bidi_it->ignore_bn_limit <= 0)
908 {
909 if (current_level <= BIDI_MAXLEVEL - 4)
910 {
911 /* Compute the least odd embedding level greater than
912 the current level. */
913 new_level = ((current_level + 1) & ~1) + 1;
89d3374a 914 if (bidi_it->type_after_w1 == RLE)
b7b65b15
EZ
915 override = NEUTRAL_DIR;
916 else
917 override = R2L;
918 if (current_level == BIDI_MAXLEVEL - 4)
919 bidi_it->invalid_rl_levels = 0;
920 bidi_push_embedding_level (bidi_it, new_level, override);
921 }
922 else
923 {
924 bidi_it->invalid_levels++;
925 /* See the commentary about invalid_rl_levels below. */
926 if (bidi_it->invalid_rl_levels < 0)
927 bidi_it->invalid_rl_levels = 0;
928 bidi_it->invalid_rl_levels++;
929 }
930 }
89d3374a 931 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
932 || bidi_it->next_en_pos > bidi_it->charpos)
933 type = WEAK_EN;
934 break;
935 case LRE: /* X3 */
936 case LRO: /* X5 */
89d3374a
EZ
937 bidi_it->type_after_w1 = type;
938 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
939 type = WEAK_BN; /* X9/Retaining */
940 if (bidi_it->ignore_bn_limit <= 0)
941 {
942 if (current_level <= BIDI_MAXLEVEL - 5)
943 {
944 /* Compute the least even embedding level greater than
945 the current level. */
946 new_level = ((current_level + 2) & ~1);
89d3374a 947 if (bidi_it->type_after_w1 == LRE)
b7b65b15
EZ
948 override = NEUTRAL_DIR;
949 else
950 override = L2R;
951 bidi_push_embedding_level (bidi_it, new_level, override);
952 }
953 else
954 {
955 bidi_it->invalid_levels++;
956 /* invalid_rl_levels counts invalid levels encountered
957 while the embedding level was already too high for
958 LRE/LRO, but not for RLE/RLO. That is because
959 there may be exactly one PDF which we should not
960 ignore even though invalid_levels is non-zero.
961 invalid_rl_levels helps to know what PDF is
962 that. */
963 if (bidi_it->invalid_rl_levels >= 0)
964 bidi_it->invalid_rl_levels++;
965 }
966 }
89d3374a 967 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
968 || bidi_it->next_en_pos > bidi_it->charpos)
969 type = WEAK_EN;
970 break;
971 case PDF: /* X7 */
89d3374a
EZ
972 bidi_it->type_after_w1 = type;
973 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
974 type = WEAK_BN; /* X9/Retaining */
975 if (bidi_it->ignore_bn_limit <= 0)
976 {
977 if (!bidi_it->invalid_rl_levels)
978 {
979 new_level = bidi_pop_embedding_level (bidi_it);
980 bidi_it->invalid_rl_levels = -1;
981 if (bidi_it->invalid_levels)
982 bidi_it->invalid_levels--;
983 /* else nothing: UAX#9 says to ignore invalid PDFs */
984 }
985 if (!bidi_it->invalid_levels)
986 new_level = bidi_pop_embedding_level (bidi_it);
987 else
988 {
989 bidi_it->invalid_levels--;
990 bidi_it->invalid_rl_levels--;
991 }
992 }
89d3374a 993 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
994 || bidi_it->next_en_pos > bidi_it->charpos)
995 type = WEAK_EN;
996 break;
997 default:
998 /* Nothing. */
999 break;
1000 }
1001
1002 bidi_it->type = type;
2d6e4628 1003 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1004
1005 return new_level;
1006}
1007
1008/* Given an iterator state in BIDI_IT, advance one character position
1009 in the buffer to the next character (in the logical order), resolve
1010 any explicit embeddings and directional overrides, and return the
1011 embedding level of the character after resolving explicit
1012 directives and ignoring empty embeddings. */
1013static int
1014bidi_resolve_explicit (struct bidi_it *bidi_it)
1015{
1016 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1017 int new_level = bidi_resolve_explicit_1 (bidi_it);
1018
1019 if (prev_level < new_level
1020 && bidi_it->type == WEAK_BN
1021 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1502b819 1022 && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
b7b65b15
EZ
1023 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1024 + bidi_it->ch_len)))
1025 {
1026 /* Avoid pushing and popping embedding levels if the level run
1027 is empty, as this breaks level runs where it shouldn't.
1028 UAX#9 removes all the explicit embedding and override codes,
1029 so empty embeddings disappear without a trace. We need to
1030 behave as if we did the same. */
1031 struct bidi_it saved_it;
1032 int level = prev_level;
1033
1034 bidi_copy_it (&saved_it, bidi_it);
1035
1036 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1037 + bidi_it->ch_len)))
1038 {
1039 level = bidi_resolve_explicit_1 (bidi_it);
1040 }
1041
1042 if (level == prev_level) /* empty embedding */
1043 saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1044 else /* this embedding is non-empty */
1045 saved_it.ignore_bn_limit = -1;
1046
1047 bidi_copy_it (bidi_it, &saved_it);
1048 if (bidi_it->ignore_bn_limit > 0)
1049 {
1050 /* We pushed a level, but we shouldn't have. Undo that. */
1051 if (!bidi_it->invalid_rl_levels)
1052 {
1053 new_level = bidi_pop_embedding_level (bidi_it);
1054 bidi_it->invalid_rl_levels = -1;
1055 if (bidi_it->invalid_levels)
1056 bidi_it->invalid_levels--;
1057 }
1058 if (!bidi_it->invalid_levels)
1059 new_level = bidi_pop_embedding_level (bidi_it);
1060 else
1061 {
1062 bidi_it->invalid_levels--;
1063 bidi_it->invalid_rl_levels--;
1064 }
1065 }
1066 }
1067
b7b65b15
EZ
1068 if (bidi_it->type == NEUTRAL_B) /* X8 */
1069 {
21fce5ab 1070 bidi_set_paragraph_end (bidi_it);
6bff6497
EZ
1071 /* This is needed by bidi_resolve_weak below, and in L1. */
1072 bidi_it->type_after_w1 = bidi_it->type;
89d3374a 1073 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1074 }
1075
1076 return new_level;
1077}
1078
1079/* Advance in the buffer, resolve weak types and return the type of
1080 the next character after weak type resolution. */
fd3998ff 1081static bidi_type_t
b7b65b15
EZ
1082bidi_resolve_weak (struct bidi_it *bidi_it)
1083{
1084 bidi_type_t type;
1085 bidi_dir_t override;
1086 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1087 int new_level = bidi_resolve_explicit (bidi_it);
1088 int next_char;
1089 bidi_type_t type_of_next;
1090 struct bidi_it saved_it;
1091
1092 type = bidi_it->type;
1093 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1094
1095 if (type == UNKNOWN_BT
1096 || type == LRE
1097 || type == LRO
1098 || type == RLE
1099 || type == RLO
1100 || type == PDF)
1101 abort ();
1102
1103 if (new_level != prev_level
1104 || bidi_it->type == NEUTRAL_B)
1105 {
1106 /* We've got a new embedding level run, compute the directional
1107 type of sor and initialize per-run variables (UAX#9, clause
1108 X10). */
1109 bidi_set_sor_type (bidi_it, prev_level, new_level);
1110 }
1111 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1112 || type == WEAK_BN || type == STRONG_AL)
89d3374a
EZ
1113 bidi_it->type_after_w1 = type; /* needed in L1 */
1114 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1115
1116 /* Level and directional override status are already recorded in
1117 bidi_it, and do not need any change; see X6. */
1118 if (override == R2L) /* X6 */
1119 type = STRONG_R;
1120 else if (override == L2R)
1121 type = STRONG_L;
bc5a45f3 1122 else
b7b65b15 1123 {
bc5a45f3 1124 if (type == WEAK_NSM) /* W1 */
b7b65b15 1125 {
bc5a45f3 1126 /* Note that we don't need to consider the case where the
5930fe97
EZ
1127 prev character has its type overridden by an RLO or LRO,
1128 because then either the type of this NSM would have been
1129 also overridden, or the previous character is outside the
1130 current level run, and thus not relevant to this NSM.
1131 This is why NSM gets the type_after_w1 of the previous
1132 character. */
ebb5722e
EZ
1133 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1134 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1135 && bidi_it->prev.type_after_w1 != NEUTRAL_B)
5930fe97 1136 type = bidi_it->prev.type_after_w1;
bc5a45f3
EZ
1137 else if (bidi_it->sor == R2L)
1138 type = STRONG_R;
1139 else if (bidi_it->sor == L2R)
1140 type = STRONG_L;
1141 else /* shouldn't happen! */
1142 abort ();
b7b65b15 1143 }
bc5a45f3
EZ
1144 if (type == WEAK_EN /* W2 */
1145 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1146 type = WEAK_AN;
1147 else if (type == STRONG_AL) /* W3 */
1148 type = STRONG_R;
1149 else if ((type == WEAK_ES /* W4 */
1150 && bidi_it->prev.type_after_w1 == WEAK_EN
1151 && bidi_it->prev.orig_type == WEAK_EN)
1152 || (type == WEAK_CS
1153 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1154 && bidi_it->prev.orig_type == WEAK_EN)
1155 || bidi_it->prev.type_after_w1 == WEAK_AN)))
b7b65b15 1156 {
e7402cb2
EZ
1157 next_char =
1158 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1159 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
6bff6497 1160 type_of_next = bidi_get_type (next_char, override);
b7b65b15 1161
bc5a45f3 1162 if (type_of_next == WEAK_BN
b7b65b15
EZ
1163 || bidi_explicit_dir_char (next_char))
1164 {
1165 bidi_copy_it (&saved_it, bidi_it);
1166 while (bidi_resolve_explicit (bidi_it) == new_level
bc5a45f3 1167 && bidi_it->type == WEAK_BN)
b7b65b15
EZ
1168 ;
1169 type_of_next = bidi_it->type;
b7b65b15
EZ
1170 bidi_copy_it (bidi_it, &saved_it);
1171 }
bc5a45f3
EZ
1172
1173 /* If the next character is EN, but the last strong-type
1174 character is AL, that next EN will be changed to AN when
1175 we process it in W2 above. So in that case, this ES
1176 should not be changed into EN. */
1177 if (type == WEAK_ES
1178 && type_of_next == WEAK_EN
1179 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1180 type = WEAK_EN;
1181 else if (type == WEAK_CS)
b7b65b15 1182 {
bc5a45f3
EZ
1183 if (bidi_it->prev.type_after_w1 == WEAK_AN
1184 && (type_of_next == WEAK_AN
1185 /* If the next character is EN, but the last
1186 strong-type character is AL, EN will be later
1187 changed to AN when we process it in W2 above.
1188 So in that case, this ES should not be
1189 changed into EN. */
1190 || (type_of_next == WEAK_EN
1191 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1192 type = WEAK_AN;
1193 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1194 && type_of_next == WEAK_EN
1195 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1196 type = WEAK_EN;
1197 }
1198 }
1199 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1200 || type == WEAK_BN) /* W5/Retaining */
1201 {
1202 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1203 || bidi_it->next_en_pos > bidi_it->charpos)
1204 type = WEAK_EN;
1205 else /* W5: ET/BN with EN after it. */
1206 {
1207 EMACS_INT en_pos = bidi_it->charpos + 1;
1208
1209 next_char =
1210 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1211 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1212 type_of_next = bidi_get_type (next_char, override);
1213
1214 if (type_of_next == WEAK_ET
1215 || type_of_next == WEAK_BN
1216 || bidi_explicit_dir_char (next_char))
1217 {
1218 bidi_copy_it (&saved_it, bidi_it);
1219 while (bidi_resolve_explicit (bidi_it) == new_level
1220 && (bidi_it->type == WEAK_BN
1221 || bidi_it->type == WEAK_ET))
1222 ;
1223 type_of_next = bidi_it->type;
1224 en_pos = bidi_it->charpos;
1225 bidi_copy_it (bidi_it, &saved_it);
1226 }
1227 if (type_of_next == WEAK_EN)
b7b65b15 1228 {
bc5a45f3
EZ
1229 /* If the last strong character is AL, the EN we've
1230 found will become AN when we get to it (W2). */
1231 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1232 {
1233 type = WEAK_EN;
1234 /* Remember this EN position, to speed up processing
1235 of the next ETs. */
1236 bidi_it->next_en_pos = en_pos;
1237 }
1238 else if (type == WEAK_BN)
1239 type = NEUTRAL_ON; /* W6/Retaining */
b7b65b15 1240 }
b7b65b15
EZ
1241 }
1242 }
1243 }
1244
1245 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
89d3374a
EZ
1246 || (type == WEAK_BN
1247 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1248 || bidi_it->prev.type_after_w1 == WEAK_ES
1249 || bidi_it->prev.type_after_w1 == WEAK_ET)))
b7b65b15
EZ
1250 type = NEUTRAL_ON;
1251
1252 /* Store the type we've got so far, before we clobber it with strong
1253 types in W7 and while resolving neutral types. But leave alone
1254 the original types that were recorded above, because we will need
1255 them for the L1 clause. */
89d3374a
EZ
1256 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1257 bidi_it->type_after_w1 = type;
1258 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1259
1260 if (type == WEAK_EN) /* W7 */
1261 {
89d3374a 1262 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
b7b65b15
EZ
1263 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1264 type = STRONG_L;
1265 }
1266
1267 bidi_it->type = type;
2d6e4628 1268 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1269 return type;
1270}
1271
fd3998ff 1272static bidi_type_t
b7b65b15
EZ
1273bidi_resolve_neutral (struct bidi_it *bidi_it)
1274{
1275 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1276 bidi_type_t type = bidi_resolve_weak (bidi_it);
1277 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1278
1279 if (!(type == STRONG_R
1280 || type == STRONG_L
1281 || type == WEAK_BN
1282 || type == WEAK_EN
1283 || type == WEAK_AN
1284 || type == NEUTRAL_B
1285 || type == NEUTRAL_S
1286 || type == NEUTRAL_WS
1287 || type == NEUTRAL_ON))
1288 abort ();
1289
1290 if (bidi_get_category (type) == NEUTRAL
1291 || (type == WEAK_BN && prev_level == current_level))
1292 {
1293 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1294 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1295 bidi_it->next_for_neutral.type,
1296 current_level);
1297 else
1298 {
1299 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1300 the assumption of batch-style processing; see clauses W4,
1301 W5, and especially N1, which require to look far forward
1302 (as well as back) in the buffer. May the fleas of a
1303 thousand camels infest the armpits of those who design
1304 supposedly general-purpose algorithms by looking at their
1305 own implementations, and fail to consider other possible
1306 implementations! */
1307 struct bidi_it saved_it;
1308 bidi_type_t next_type;
1309
1310 if (bidi_it->scan_dir == -1)
1311 abort ();
1312
1313 bidi_copy_it (&saved_it, bidi_it);
1314 /* Scan the text forward until we find the first non-neutral
1315 character, and then use that to resolve the neutral we
1316 are dealing with now. We also cache the scanned iterator
1317 states, to salvage some of the effort later. */
1318 bidi_cache_iterator_state (bidi_it, 0);
1319 do {
1320 /* Record the info about the previous character, so that
1321 it will be cached below with this state. */
89d3374a 1322 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1323 && bidi_it->type != WEAK_BN)
1324 bidi_remember_char (&bidi_it->prev, bidi_it);
1325 type = bidi_resolve_weak (bidi_it);
1326 /* Paragraph separators have their levels fully resolved
1327 at this point, so cache them as resolved. */
1328 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1329 /* FIXME: implement L1 here, by testing for a newline and
1330 resetting the level for any sequence of whitespace
1331 characters adjacent to it. */
1332 } while (!(type == NEUTRAL_B
1333 || (type != WEAK_BN
1334 && bidi_get_category (type) != NEUTRAL)
1335 /* This is all per level run, so stop when we
1336 reach the end of this level run. */
1337 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1338 current_level));
1339
1340 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1341
1342 switch (type)
1343 {
1344 case STRONG_L:
1345 case STRONG_R:
1346 case STRONG_AL:
1347 next_type = type;
1348 break;
1349 case WEAK_EN:
1350 case WEAK_AN:
1351 /* N1: ``European and Arabic numbers are treated as
1352 though they were R.'' */
1353 next_type = STRONG_R;
1354 saved_it.next_for_neutral.type = STRONG_R;
1355 break;
1356 case WEAK_BN:
1357 if (!bidi_explicit_dir_char (bidi_it->ch))
1358 abort (); /* can't happen: BNs are skipped */
1359 /* FALLTHROUGH */
1360 case NEUTRAL_B:
1361 /* Marched all the way to the end of this level run.
1362 We need to use the eor type, whose information is
1363 stored by bidi_set_sor_type in the prev_for_neutral
1364 member. */
1365 if (saved_it.type != WEAK_BN
89d3374a 1366 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
b7b65b15
EZ
1367 {
1368 next_type = bidi_it->prev_for_neutral.type;
1369 saved_it.next_for_neutral.type = next_type;
2d6e4628 1370 bidi_check_type (next_type);
b7b65b15
EZ
1371 }
1372 else
1373 {
1374 /* This is a BN which does not adjoin neutrals.
1375 Leave its type alone. */
1376 bidi_copy_it (bidi_it, &saved_it);
1377 return bidi_it->type;
1378 }
1379 break;
1380 default:
1381 abort ();
1382 }
1383 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1384 next_type, current_level);
1385 saved_it.type = type;
2d6e4628 1386 bidi_check_type (type);
b7b65b15
EZ
1387 bidi_copy_it (bidi_it, &saved_it);
1388 }
1389 }
1390 return type;
1391}
1392
1393/* Given an iterator state in BIDI_IT, advance one character position
1394 in the buffer to the next character (in the logical order), resolve
1395 the bidi type of that next character, and return that type. */
fd3998ff 1396static bidi_type_t
b7b65b15
EZ
1397bidi_type_of_next_char (struct bidi_it *bidi_it)
1398{
1399 bidi_type_t type;
1400
1401 /* This should always be called during a forward scan. */
1402 if (bidi_it->scan_dir != 1)
1403 abort ();
1404
1405 /* Reset the limit until which to ignore BNs if we step out of the
1406 area where we found only empty levels. */
1407 if ((bidi_it->ignore_bn_limit > 0
1408 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1409 || (bidi_it->ignore_bn_limit == -1
1410 && !bidi_explicit_dir_char (bidi_it->ch)))
1411 bidi_it->ignore_bn_limit = 0;
1412
1413 type = bidi_resolve_neutral (bidi_it);
1414
1415 return type;
1416}
1417
1418/* Given an iterator state BIDI_IT, advance one character position in
1419 the buffer to the next character (in the logical order), resolve
1420 the embedding and implicit levels of that next character, and
1421 return the resulting level. */
fd3998ff 1422static int
b7b65b15
EZ
1423bidi_level_of_next_char (struct bidi_it *bidi_it)
1424{
1425 bidi_type_t type;
1426 int level, prev_level = -1;
1427 struct bidi_saved_info next_for_neutral;
1428
1429 if (bidi_it->scan_dir == 1)
1430 {
1431 /* There's no sense in trying to advance if we hit end of text. */
1502b819 1432 if (bidi_it->bytepos >= ZV_BYTE)
b7b65b15
EZ
1433 return bidi_it->resolved_level;
1434
1435 /* Record the info about the previous character. */
89d3374a 1436 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1437 && bidi_it->type != WEAK_BN)
1438 bidi_remember_char (&bidi_it->prev, bidi_it);
89d3374a
EZ
1439 if (bidi_it->type_after_w1 == STRONG_R
1440 || bidi_it->type_after_w1 == STRONG_L
1441 || bidi_it->type_after_w1 == STRONG_AL)
b7b65b15
EZ
1442 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1443 /* FIXME: it sounds like we don't need both prev and
1444 prev_for_neutral members, but I'm leaving them both for now. */
1445 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1446 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1447 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1448
1449 /* If we overstepped the characters used for resolving neutrals
1450 and whitespace, invalidate their info in the iterator. */
1451 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1452 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1453 if (bidi_it->next_en_pos >= 0
1454 && bidi_it->charpos >= bidi_it->next_en_pos)
1455 bidi_it->next_en_pos = -1;
1456 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1457 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1458 bidi_it->next_for_ws.type = UNKNOWN_BT;
1459
1460 /* This must be taken before we fill the iterator with the info
1461 about the next char. If we scan backwards, the iterator
1462 state must be already cached, so there's no need to know the
1463 embedding level of the previous character, since we will be
1464 returning to our caller shortly. */
1465 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1466 }
1467 next_for_neutral = bidi_it->next_for_neutral;
1468
1469 /* Perhaps it is already cached. */
1470 type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1471 if (type != UNKNOWN_BT)
1472 {
1473 /* Don't lose the information for resolving neutrals! The
1474 cached states could have been cached before their
1475 next_for_neutral member was computed. If we are on our way
1476 forward, we can simply take the info from the previous
1477 state. */
1478 if (bidi_it->scan_dir == 1
1479 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1480 bidi_it->next_for_neutral = next_for_neutral;
1481
1482 /* If resolved_level is -1, it means this state was cached
1483 before it was completely resolved, so we cannot return
1484 it. */
1485 if (bidi_it->resolved_level != -1)
1486 return bidi_it->resolved_level;
1487 }
1488 if (bidi_it->scan_dir == -1)
1489 /* If we are going backwards, the iterator state is already cached
1490 from previous scans, and should be fully resolved. */
1491 abort ();
1492
1493 if (type == UNKNOWN_BT)
1494 type = bidi_type_of_next_char (bidi_it);
1495
1496 if (type == NEUTRAL_B)
1497 return bidi_it->resolved_level;
1498
1499 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1500 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1501 || (type == WEAK_BN && prev_level == level))
1502 {
1503 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1504 abort ();
1505
1506 /* If the cached state shows a neutral character, it was not
1507 resolved by bidi_resolve_neutral, so do it now. */
1508 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1509 bidi_it->next_for_neutral.type,
1510 level);
1511 }
1512
1513 if (!(type == STRONG_R
1514 || type == STRONG_L
1515 || type == WEAK_BN
1516 || type == WEAK_EN
1517 || type == WEAK_AN))
1518 abort ();
1519 bidi_it->type = type;
2d6e4628 1520 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1521
1522 /* For L1 below, we need to know, for each WS character, whether
0d327994 1523 it belongs to a sequence of WS characters preceding a newline
b7b65b15 1524 or a TAB or a paragraph separator. */
89d3374a 1525 if (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
1526 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1527 {
1528 int ch;
1529 int clen = bidi_it->ch_len;
6bff6497
EZ
1530 EMACS_INT bpos = bidi_it->bytepos;
1531 EMACS_INT cpos = bidi_it->charpos;
b7b65b15
EZ
1532 bidi_type_t chtype;
1533
1534 do {
1535 /*_fetch_multibyte_char_len = 1;*/
e7402cb2 1536 ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
b7b65b15
EZ
1537 bpos += clen;
1538 cpos++;
e7402cb2
EZ
1539 clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1540 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
b7b65b15
EZ
1541 chtype = NEUTRAL_B;
1542 else
6bff6497 1543 chtype = bidi_get_type (ch, NEUTRAL_DIR);
b7b65b15
EZ
1544 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1545 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1546 bidi_it->next_for_ws.type = chtype;
2d6e4628 1547 bidi_check_type (bidi_it->next_for_ws.type);
b7b65b15
EZ
1548 bidi_it->next_for_ws.charpos = cpos;
1549 bidi_it->next_for_ws.bytepos = bpos;
1550 }
1551
1552 /* Resolve implicit levels, with a twist: PDFs get the embedding
1553 level of the enbedding they terminate. See below for the
1554 reason. */
89d3374a 1555 if (bidi_it->orig_type == PDF
b7b65b15
EZ
1556 /* Don't do this if this formatting code didn't change the
1557 embedding level due to invalid or empty embeddings. */
1558 && prev_level != level)
1559 {
1560 /* Don't look in UAX#9 for the reason for this: it's our own
1561 private quirk. The reason is that we want the formatting
1562 codes to be delivered so that they bracket the text of their
1563 embedding. For example, given the text
1564
1565 {RLO}teST{PDF}
1566
1567 we want it to be displayed as
1568
0d68907d 1569 {PDF}STet{RLO}
b7b65b15
EZ
1570
1571 not as
1572
1573 STet{RLO}{PDF}
1574
1575 which will result because we bump up the embedding level as
1576 soon as we see the RLO and pop it as soon as we see the PDF,
1577 so RLO itself has the same embedding level as "teST", and
1578 thus would be normally delivered last, just before the PDF.
1579 The switch below fiddles with the level of PDF so that this
1580 ugly side effect does not happen.
1581
1582 (This is, of course, only important if the formatting codes
e7402cb2
EZ
1583 are actually displayed, but Emacs does need to display them
1584 if the user wants to.) */
b7b65b15
EZ
1585 level = prev_level;
1586 }
89d3374a
EZ
1587 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1588 || bidi_it->orig_type == NEUTRAL_S
e7402cb2
EZ
1589 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1590 /* || bidi_it->ch == LINESEP_CHAR */
89d3374a 1591 || (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
1592 && (bidi_it->next_for_ws.type == NEUTRAL_B
1593 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1594 level = bidi_it->level_stack[0].level;
1595 else if ((level & 1) == 0) /* I1 */
1596 {
1597 if (type == STRONG_R)
1598 level++;
1599 else if (type == WEAK_EN || type == WEAK_AN)
1600 level += 2;
1601 }
1602 else /* I2 */
1603 {
1604 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1605 level++;
1606 }
1607
1608 bidi_it->resolved_level = level;
1609 return level;
1610}
1611
1612/* Move to the other edge of a level given by LEVEL. If END_FLAG is
1613 non-zero, we are at the end of a level, and we need to prepare to
1614 resume the scan of the lower level.
1615
1616 If this level's other edge is cached, we simply jump to it, filling
1617 the iterator structure with the iterator state on the other edge.
1618 Otherwise, we walk the buffer until we come back to the same level
1619 as LEVEL.
1620
1621 Note: we are not talking here about a ``level run'' in the UAX#9
1622 sense of the term, but rather about a ``level'' which includes
1623 all the levels higher than it. In other words, given the levels
1624 like this:
1625
1626 11111112222222333333334443343222222111111112223322111
1627 A B C
1628
1629 and assuming we are at point A scanning left to right, this
1630 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1631 at point B. */
1632static void
1633bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1634{
1635 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1636 int idx;
1637
1638 /* Try the cache first. */
1639 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1640 bidi_cache_fetch_state (idx, bidi_it);
1641 else
1642 {
1643 int new_level;
1644
1645 if (end_flag)
1646 abort (); /* if we are at end of level, its edges must be cached */
1647
1648 bidi_cache_iterator_state (bidi_it, 1);
1649 do {
1650 new_level = bidi_level_of_next_char (bidi_it);
1651 bidi_cache_iterator_state (bidi_it, 1);
1652 } while (new_level >= level);
1653 }
1654}
1655
1656void
4b292a22 1657bidi_move_to_visually_next (struct bidi_it *bidi_it)
b7b65b15
EZ
1658{
1659 int old_level, new_level, next_level;
9c82e145 1660 struct bidi_it sentinel;
b7b65b15
EZ
1661
1662 if (bidi_it->scan_dir == 0)
1663 {
1664 bidi_it->scan_dir = 1; /* default to logical order */
1665 }
1666
be39f003
EZ
1667 /* If we just passed a newline, initialize for the next line. */
1668 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1669 bidi_line_init (bidi_it);
1670
6dcfd253
EZ
1671 /* Prepare the sentinel iterator state, and cache it. When we bump
1672 into it, scanning backwards, we'll know that the last non-base
1673 level is exhausted. */
b7b65b15 1674 if (bidi_cache_idx == 0)
9c82e145
EZ
1675 {
1676 bidi_copy_it (&sentinel, bidi_it);
1677 if (bidi_it->first_elt)
1678 {
1679 sentinel.charpos--; /* cached charpos needs to be monotonic */
1680 sentinel.bytepos--;
1681 sentinel.ch = '\n'; /* doesn't matter, but why not? */
1682 sentinel.ch_len = 1;
1683 }
6dcfd253 1684 bidi_cache_iterator_state (&sentinel, 1);
9c82e145 1685 }
b7b65b15
EZ
1686
1687 old_level = bidi_it->resolved_level;
1688 new_level = bidi_level_of_next_char (bidi_it);
b7b65b15
EZ
1689
1690 /* Reordering of resolved levels (clause L2) is implemented by
1691 jumping to the other edge of the level and flipping direction of
c0546589 1692 scanning the text whenever we find a level change. */
b7b65b15
EZ
1693 if (new_level != old_level)
1694 {
1695 int ascending = new_level > old_level;
1696 int level_to_search = ascending ? old_level + 1 : old_level;
1697 int incr = ascending ? 1 : -1;
1698 int expected_next_level = old_level + incr;
1699
b7b65b15
EZ
1700 /* Jump (or walk) to the other edge of this level. */
1701 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1702 /* Switch scan direction and peek at the next character in the
1703 new direction. */
1704 bidi_it->scan_dir = -bidi_it->scan_dir;
1705
1706 /* The following loop handles the case where the resolved level
1707 jumps by more than one. This is typical for numbers inside a
1708 run of text with left-to-right embedding direction, but can
1709 also happen in other situations. In those cases the decision
1710 where to continue after a level change, and in what direction,
1711 is tricky. For example, given a text like below:
1712
1713 abcdefgh
1714 11336622
1715
1716 (where the numbers below the text show the resolved levels),
1717 the result of reordering according to UAX#9 should be this:
1718
1719 efdcghba
1720
1721 This is implemented by the loop below which flips direction
1722 and jumps to the other edge of the level each time it finds
1723 the new level not to be the expected one. The expected level
1724 is always one more or one less than the previous one. */
1725 next_level = bidi_peek_at_next_level (bidi_it);
1726 while (next_level != expected_next_level)
1727 {
1728 expected_next_level += incr;
1729 level_to_search += incr;
1730 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1731 bidi_it->scan_dir = -bidi_it->scan_dir;
1732 next_level = bidi_peek_at_next_level (bidi_it);
1733 }
1734
1735 /* Finally, deliver the next character in the new direction. */
1736 next_level = bidi_level_of_next_char (bidi_it);
1737 }
1738
b44d9321
EZ
1739 /* Take note when we have just processed the newline that precedes
1740 the end of the paragraph. The next time we are about to be
1741 called, set_iterator_to_next will automatically reinit the
1742 paragraph direction, if needed. We do this at the newline before
1743 the paragraph separator, because the next character might not be
1744 the first character of the next paragraph, due to the bidi
c0546589
EZ
1745 reordering, whereas we _must_ know the paragraph base direction
1746 _before_ we process the paragraph's text, since the base
1747 direction affects the reordering. */
6bff6497 1748 if (bidi_it->scan_dir == 1
be39f003
EZ
1749 && bidi_it->orig_type == NEUTRAL_B
1750 && bidi_it->bytepos < ZV_BYTE)
1751 {
1752 EMACS_INT sep_len =
1753 bidi_at_paragraph_end (bidi_it->charpos + 1,
1754 bidi_it->bytepos + bidi_it->ch_len);
1755 if (sep_len >= 0)
1756 {
1757 bidi_it->new_paragraph = 1;
b44d9321
EZ
1758 /* Record the buffer position of the last character of the
1759 paragraph separator. */
1760 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
be39f003
EZ
1761 }
1762 }
6bff6497 1763
b7b65b15
EZ
1764 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1765 {
1766 /* If we are at paragraph's base embedding level and beyond the
1767 last cached position, the cache's job is done and we can
1768 discard it. */
1769 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1770 && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1771 bidi_cache_reset ();
1772 /* But as long as we are caching during forward scan, we must
1773 cache each state, or else the cache integrity will be
1774 compromised: it assumes cached states correspond to buffer
1775 positions 1:1. */
1776 else
1777 bidi_cache_iterator_state (bidi_it, 1);
1778 }
1779}
1780
1781/* This is meant to be called from within the debugger, whenever you
1782 wish to examine the cache contents. */
e78aecca 1783void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
b7b65b15
EZ
1784void
1785bidi_dump_cached_states (void)
1786{
1787 int i;
1788 int ndigits = 1;
1789
1790 if (bidi_cache_idx == 0)
1791 {
1792 fprintf (stderr, "The cache is empty.\n");
1793 return;
1794 }
1795 fprintf (stderr, "Total of %d state%s in cache:\n",
1796 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1797
1798 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1799 ndigits++;
1800 fputs ("ch ", stderr);
1801 for (i = 0; i < bidi_cache_idx; i++)
1802 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1803 fputs ("\n", stderr);
1804 fputs ("lvl ", stderr);
1805 for (i = 0; i < bidi_cache_idx; i++)
1806 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1807 fputs ("\n", stderr);
1808 fputs ("pos ", stderr);
1809 for (i = 0; i < bidi_cache_idx; i++)
c2982e87 1810 fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
b7b65b15
EZ
1811 fputs ("\n", stderr);
1812}