Remove from etc/NEWS the warning about display strings not being reordered.
[bpt/emacs.git] / src / bidi.c
CommitLineData
cbb09f04 1/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
73b0cd50 2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
b118e65f 3 Free Software Foundation, Inc.
b7b65b15
EZ
4
5This file is part of GNU Emacs.
6
a8d11bd3 7GNU Emacs is free software: you can redistribute it and/or modify
b7b65b15 8it under the terms of the GNU General Public License as published by
a8d11bd3
EZ
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
b7b65b15
EZ
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
b7b65b15 17You should have received a copy of the GNU General Public License
a8d11bd3 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
b7b65b15 19
2d6e4628
EZ
20/* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
cbb09f04 23 (UBA) as per UAX#9, a part of the Unicode Standard.
b7b65b15
EZ
24
25 Unlike the reference and most other implementations, this one is
940afb59
EZ
26 designed to be called once for every character in the buffer or
27 string.
b7b65b15 28
4b292a22 29 The main entry point is bidi_move_to_visually_next. Each time it
b7b65b15
EZ
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
4b292a22
EZ
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
b7b65b15
EZ
36 character by resolving their levels on the fly.
37
940afb59
EZ
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
41 argument character.
42
89d3374a
EZ
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
46
b7b65b15
EZ
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
51
b7b65b15 52#include <config.h>
b7b65b15 53#include <stdio.h>
29e3d8d1
EZ
54#include <setjmp.h>
55
b7b65b15
EZ
56#include "lisp.h"
57#include "buffer.h"
58#include "character.h"
59#include "dispextern.h"
60
61static int bidi_initialized = 0;
62
cbc4fd20 63static Lisp_Object bidi_type_table, bidi_mirror_table;
b7b65b15
EZ
64
65#define LRM_CHAR 0x200E
66#define RLM_CHAR 0x200F
b7b65b15 67#define BIDI_EOB -1
b7b65b15 68
b7b65b15
EZ
69/* Data type for describing the bidirectional character categories. */
70typedef enum {
71 UNKNOWN_BC,
72 NEUTRAL,
73 WEAK,
74 STRONG
75} bidi_category_t;
76
e78aecca 77extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
b7b65b15
EZ
78int bidi_ignore_explicit_marks_for_paragraph_level = 1;
79
372b7a95 80static Lisp_Object paragraph_start_re, paragraph_separate_re;
6bff6497 81static Lisp_Object Qparagraph_start, Qparagraph_separate;
b7b65b15 82
cbb09f04
EZ
83\f
84/***********************************************************************
85 Utilities
86 ***********************************************************************/
b7b65b15 87
6bff6497
EZ
88/* Return the bidi type of a character CH, subject to the current
89 directional OVERRIDE. */
58b9f433 90static inline bidi_type_t
6bff6497 91bidi_get_type (int ch, bidi_dir_t override)
b7b65b15 92{
6bff6497
EZ
93 bidi_type_t default_type;
94
e7402cb2
EZ
95 if (ch == BIDI_EOB)
96 return NEUTRAL_B;
e342a24d
EZ
97 if (ch < 0 || ch > MAX_CHAR)
98 abort ();
6bff6497
EZ
99
100 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
101
102 if (override == NEUTRAL_DIR)
103 return default_type;
104
105 switch (default_type)
106 {
107 /* Although UAX#9 does not tell, it doesn't make sense to
108 override NEUTRAL_B and LRM/RLM characters. */
109 case NEUTRAL_B:
110 case LRE:
111 case LRO:
112 case RLE:
113 case RLO:
114 case PDF:
115 return default_type;
116 default:
117 switch (ch)
118 {
119 case LRM_CHAR:
120 case RLM_CHAR:
121 return default_type;
122 default:
123 if (override == L2R) /* X6 */
124 return STRONG_L;
125 else if (override == R2L)
126 return STRONG_R;
127 else
128 abort (); /* can't happen: handled above */
129 }
130 }
b7b65b15
EZ
131}
132
7d3b3862 133static void
2d6e4628
EZ
134bidi_check_type (bidi_type_t type)
135{
136 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
137 abort ();
138}
139
b7b65b15 140/* Given a bidi TYPE of a character, return its category. */
58b9f433 141static inline bidi_category_t
b7b65b15
EZ
142bidi_get_category (bidi_type_t type)
143{
144 switch (type)
145 {
146 case UNKNOWN_BT:
147 return UNKNOWN_BC;
148 case STRONG_L:
149 case STRONG_R:
150 case STRONG_AL:
151 case LRE:
152 case LRO:
153 case RLE:
154 case RLO:
155 return STRONG;
156 case PDF: /* ??? really?? */
157 case WEAK_EN:
158 case WEAK_ES:
159 case WEAK_ET:
160 case WEAK_AN:
161 case WEAK_CS:
162 case WEAK_NSM:
163 case WEAK_BN:
164 return WEAK;
165 case NEUTRAL_B:
166 case NEUTRAL_S:
167 case NEUTRAL_WS:
168 case NEUTRAL_ON:
169 return NEUTRAL;
170 default:
171 abort ();
172 }
173}
174
a6e8d97c
EZ
175/* Return the mirrored character of C, if it has one. If C has no
176 mirrored counterpart, return C.
177 Note: The conditions in UAX#9 clause L4 regarding the surrounding
178 context must be tested by the caller. */
b7b65b15
EZ
179int
180bidi_mirror_char (int c)
181{
cbc4fd20
EZ
182 Lisp_Object val;
183
184 if (c == BIDI_EOB)
185 return c;
186 if (c < 0 || c > MAX_CHAR)
187 abort ();
b7b65b15 188
cbc4fd20
EZ
189 val = CHAR_TABLE_REF (bidi_mirror_table, c);
190 if (INTEGERP (val))
b7b65b15 191 {
cbc4fd20 192 int v = XINT (val);
b7b65b15 193
cbc4fd20
EZ
194 if (v < 0 || v > MAX_CHAR)
195 abort ();
196
197 return v;
b7b65b15 198 }
cbc4fd20 199
b7b65b15
EZ
200 return c;
201}
202
cbb09f04
EZ
203/* Determine the start-of-run (sor) directional type given the two
204 embedding levels on either side of the run boundary. Also, update
205 the saved info about previously seen characters, since that info is
206 generally valid for a single level run. */
58b9f433 207static inline void
cbb09f04
EZ
208bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
209{
210 int higher_level = level_before > level_after ? level_before : level_after;
211
212 /* The prev_was_pdf gork is required for when we have several PDFs
213 in a row. In that case, we want to compute the sor type for the
214 next level run only once: when we see the first PDF. That's
215 because the sor type depends only on the higher of the two levels
216 that we find on the two sides of the level boundary (see UAX#9,
217 clause X10), and so we don't need to know the final embedding
218 level to which we descend after processing all the PDFs. */
219 if (!bidi_it->prev_was_pdf || level_before < level_after)
220 /* FIXME: should the default sor direction be user selectable? */
221 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
222 if (level_before > level_after)
223 bidi_it->prev_was_pdf = 1;
224
225 bidi_it->prev.type = UNKNOWN_BT;
226 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
227 bidi_it->last_strong.orig_type = UNKNOWN_BT;
228 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
229 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
230 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
231 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
232 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
233 bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
234}
235
236/* Push the current embedding level and override status; reset the
237 current level to LEVEL and the current override status to OVERRIDE. */
58b9f433 238static inline void
cbb09f04
EZ
239bidi_push_embedding_level (struct bidi_it *bidi_it,
240 int level, bidi_dir_t override)
241{
242 bidi_it->stack_idx++;
7e2ad32c 243 xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
cbb09f04
EZ
244 bidi_it->level_stack[bidi_it->stack_idx].level = level;
245 bidi_it->level_stack[bidi_it->stack_idx].override = override;
246}
247
248/* Pop the embedding level and directional override status from the
249 stack, and return the new level. */
58b9f433 250static inline int
cbb09f04
EZ
251bidi_pop_embedding_level (struct bidi_it *bidi_it)
252{
253 /* UAX#9 says to ignore invalid PDFs. */
254 if (bidi_it->stack_idx > 0)
255 bidi_it->stack_idx--;
256 return bidi_it->level_stack[bidi_it->stack_idx].level;
257}
258
259/* Record in SAVED_INFO the information about the current character. */
58b9f433 260static inline void
cbb09f04
EZ
261bidi_remember_char (struct bidi_saved_info *saved_info,
262 struct bidi_it *bidi_it)
263{
264 saved_info->charpos = bidi_it->charpos;
265 saved_info->bytepos = bidi_it->bytepos;
266 saved_info->type = bidi_it->type;
267 bidi_check_type (bidi_it->type);
268 saved_info->type_after_w1 = bidi_it->type_after_w1;
269 bidi_check_type (bidi_it->type_after_w1);
270 saved_info->orig_type = bidi_it->orig_type;
271 bidi_check_type (bidi_it->orig_type);
272}
273
b7b65b15
EZ
274/* Copy the bidi iterator from FROM to TO. To save cycles, this only
275 copies the part of the level stack that is actually in use. */
58b9f433 276static inline void
b7b65b15
EZ
277bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
278{
279 int i;
280
e69a9370 281 /* Copy everything except the level stack and beyond. */
182ce2d2 282 memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
b7b65b15
EZ
283
284 /* Copy the active part of the level stack. */
285 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
286 for (i = 1; i <= from->stack_idx; i++)
287 to->level_stack[i] = from->level_stack[i];
288}
289
cbb09f04
EZ
290\f
291/***********************************************************************
292 Caching the bidi iterator states
293 ***********************************************************************/
b7b65b15 294
2fe72643
EZ
295#define BIDI_CACHE_CHUNK 200
296static struct bidi_it *bidi_cache;
297static size_t bidi_cache_size = 0;
0416466c 298static size_t elsz = sizeof (struct bidi_it);
87e67904
EZ
299static EMACS_INT bidi_cache_idx; /* next unused cache slot */
300static EMACS_INT bidi_cache_last_idx; /* slot of last cache hit */
301static EMACS_INT bidi_cache_start = 0; /* start of cache for this
302 "stack" level */
303
304/* Reset the cache state to the empty state. We only reset the part
305 of the cache relevant to iteration of the current object. Previous
306 objects, which are pushed on the display iterator's stack, are left
307 intact. This is called when the cached information is no more
308 useful for the current iteration, e.g. when we were reseated to a
309 new position on the same object. */
58b9f433 310static inline void
b7b65b15
EZ
311bidi_cache_reset (void)
312{
87e67904 313 bidi_cache_idx = bidi_cache_start;
b7b65b15
EZ
314 bidi_cache_last_idx = -1;
315}
316
87e67904
EZ
317/* Shrink the cache to its minimal size. Called when we init the bidi
318 iterator for reordering a buffer or a string that does not come
319 from display properties, because that means all the previously
320 cached info is of no further use. */
58b9f433 321static inline void
2fe72643
EZ
322bidi_cache_shrink (void)
323{
324 if (bidi_cache_size > BIDI_CACHE_CHUNK)
325 {
0416466c
EZ
326 bidi_cache_size = BIDI_CACHE_CHUNK;
327 bidi_cache =
328 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643
EZ
329 }
330 bidi_cache_reset ();
331}
332
58b9f433 333static inline void
b7b65b15
EZ
334bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
335{
336 int current_scan_dir = bidi_it->scan_dir;
337
87e67904 338 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
b7b65b15
EZ
339 abort ();
340
341 bidi_copy_it (bidi_it, &bidi_cache[idx]);
342 bidi_it->scan_dir = current_scan_dir;
343 bidi_cache_last_idx = idx;
344}
345
346/* Find a cached state with a given CHARPOS and resolved embedding
347 level less or equal to LEVEL. if LEVEL is -1, disregard the
348 resolved levels in cached states. DIR, if non-zero, means search
349 in that direction from the last cache hit. */
58b9f433 350static inline int
61bfec98 351bidi_cache_search (EMACS_INT charpos, int level, int dir)
b7b65b15
EZ
352{
353 int i, i_start;
354
6eec7596 355 if (bidi_cache_idx > bidi_cache_start)
b7b65b15 356 {
6eec7596
EZ
357 if (bidi_cache_last_idx == -1)
358 bidi_cache_last_idx = bidi_cache_idx - 1;
b7b65b15 359 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
182ce2d2
EZ
360 {
361 dir = -1;
362 i_start = bidi_cache_last_idx - 1;
363 }
364 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
365 + bidi_cache[bidi_cache_last_idx].nchars - 1))
366 {
367 dir = 1;
368 i_start = bidi_cache_last_idx + 1;
369 }
370 else if (dir)
b7b65b15
EZ
371 i_start = bidi_cache_last_idx;
372 else
373 {
374 dir = -1;
375 i_start = bidi_cache_idx - 1;
376 }
377
378 if (dir < 0)
379 {
380 /* Linear search for now; FIXME! */
87e67904 381 for (i = i_start; i >= bidi_cache_start; i--)
182ce2d2
EZ
382 if (bidi_cache[i].charpos <= charpos
383 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
b7b65b15
EZ
384 && (level == -1 || bidi_cache[i].resolved_level <= level))
385 return i;
386 }
387 else
388 {
389 for (i = i_start; i < bidi_cache_idx; i++)
182ce2d2
EZ
390 if (bidi_cache[i].charpos <= charpos
391 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
b7b65b15
EZ
392 && (level == -1 || bidi_cache[i].resolved_level <= level))
393 return i;
394 }
395 }
396
397 return -1;
398}
399
400/* Find a cached state where the resolved level changes to a value
401 that is lower than LEVEL, and return its cache slot index. DIR is
402 the direction to search, starting with the last used cache slot.
87e67904
EZ
403 If DIR is zero, we search backwards from the last occupied cache
404 slot. BEFORE, if non-zero, means return the index of the slot that
405 is ``before'' the level change in the search direction. That is,
b7b65b15
EZ
406 given the cached levels like this:
407
408 1122333442211
409 AB C
410
411 and assuming we are at the position cached at the slot marked with
412 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
413 index of slot B or A, depending whether BEFORE is, respectively,
414 non-zero or zero. */
415static int
416bidi_cache_find_level_change (int level, int dir, int before)
417{
418 if (bidi_cache_idx)
419 {
420 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
421 int incr = before ? 1 : 0;
422
6eec7596
EZ
423 xassert (!dir || bidi_cache_last_idx >= 0);
424
b7b65b15
EZ
425 if (!dir)
426 dir = -1;
427 else if (!incr)
428 i += dir;
429
430 if (dir < 0)
431 {
87e67904 432 while (i >= bidi_cache_start + incr)
b7b65b15
EZ
433 {
434 if (bidi_cache[i - incr].resolved_level >= 0
435 && bidi_cache[i - incr].resolved_level < level)
436 return i;
437 i--;
438 }
439 }
440 else
441 {
442 while (i < bidi_cache_idx - incr)
443 {
444 if (bidi_cache[i + incr].resolved_level >= 0
445 && bidi_cache[i + incr].resolved_level < level)
446 return i;
447 i++;
448 }
449 }
450 }
451
452 return -1;
453}
454
58b9f433
EZ
455static inline void
456bidi_cache_ensure_space (int idx)
457{
458 /* Enlarge the cache as needed. */
459 if (idx >= bidi_cache_size)
460 {
57b3e30b
EZ
461 while (idx >= bidi_cache_size)
462 bidi_cache_size += BIDI_CACHE_CHUNK;
58b9f433
EZ
463 bidi_cache =
464 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
465 }
466}
467
468static inline void
b7b65b15
EZ
469bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
470{
471 int idx;
472
473 /* We should never cache on backward scans. */
474 if (bidi_it->scan_dir == -1)
475 abort ();
476 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
477
478 if (idx < 0)
479 {
480 idx = bidi_cache_idx;
58b9f433 481 bidi_cache_ensure_space (idx);
bd924a5d
EZ
482 /* Character positions should correspond to cache positions 1:1.
483 If we are outside the range of cached positions, the cache is
484 useless and must be reset. */
87e67904 485 if (idx > bidi_cache_start &&
182ce2d2
EZ
486 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
487 + bidi_cache[idx - 1].nchars)
a1344e7d 488 || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
bd924a5d
EZ
489 {
490 bidi_cache_reset ();
87e67904 491 idx = bidi_cache_start;
bd924a5d 492 }
102ebb00
EZ
493 if (bidi_it->nchars <= 0)
494 abort ();
b7b65b15
EZ
495 bidi_copy_it (&bidi_cache[idx], bidi_it);
496 if (!resolved)
497 bidi_cache[idx].resolved_level = -1;
498 }
499 else
500 {
501 /* Copy only the members which could have changed, to avoid
502 costly copying of the entire struct. */
503 bidi_cache[idx].type = bidi_it->type;
2d6e4628 504 bidi_check_type (bidi_it->type);
89d3374a
EZ
505 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
506 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
507 if (resolved)
508 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
509 else
510 bidi_cache[idx].resolved_level = -1;
511 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
512 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
513 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
514 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
515 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
516 }
517
518 bidi_cache_last_idx = idx;
519 if (idx >= bidi_cache_idx)
520 bidi_cache_idx = idx + 1;
521}
522
58b9f433 523static inline bidi_type_t
61bfec98 524bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
b7b65b15
EZ
525{
526 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
527
87e67904 528 if (i >= bidi_cache_start)
b7b65b15
EZ
529 {
530 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
531
5009f85e 532 bidi_copy_it (bidi_it, &bidi_cache[i]);
b7b65b15
EZ
533 bidi_cache_last_idx = i;
534 /* Don't let scan direction from from the cached state override
535 the current scan direction. */
536 bidi_it->scan_dir = current_scan_dir;
537 return bidi_it->type;
538 }
539
540 return UNKNOWN_BT;
541}
542
58b9f433 543static inline int
b7b65b15
EZ
544bidi_peek_at_next_level (struct bidi_it *bidi_it)
545{
87e67904 546 if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
b7b65b15
EZ
547 abort ();
548 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
549}
550
cbb09f04 551\f
58b9f433
EZ
552/***********************************************************************
553 Pushing and popping the bidi iterator state
554 ***********************************************************************/
0c22566f
EZ
555/* 5-slot stack for saving the start of the previous level of the
556 cache. xdisp.c maintains a 5-slot stack for its iterator state,
557 and we need the same size of our stack. */
558static int bidi_cache_start_stack[IT_STACK_SIZE];
58b9f433
EZ
559static int bidi_cache_sp;
560
561/* Push the bidi iterator state in preparation for reordering a
562 different object, e.g. display string found at certain buffer
7e2ad32c
EZ
563 position. Pushing the bidi iterator boils down to saving its
564 entire state on the cache and starting a new cache "stacked" on top
565 of the current cache. */
58b9f433
EZ
566void
567bidi_push_it (struct bidi_it *bidi_it)
568{
569 /* Save the current iterator state in its entirety after the last
570 used cache slot. */
571 bidi_cache_ensure_space (bidi_cache_idx);
572 memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
573
574 /* Push the current cache start onto the stack. */
7e2ad32c 575 xassert (bidi_cache_sp < IT_STACK_SIZE);
58b9f433
EZ
576 bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
577
578 /* Start a new level of cache, and make it empty. */
579 bidi_cache_start = bidi_cache_idx;
580 bidi_cache_last_idx = -1;
581}
582
583/* Restore the iterator state saved by bidi_push_it and return the
584 cache to the corresponding state. */
585void
586bidi_pop_it (struct bidi_it *bidi_it)
587{
588 if (bidi_cache_start <= 0)
589 abort ();
590
591 /* Reset the next free cache slot index to what it was before the
592 call to bidi_push_it. */
593 bidi_cache_idx = bidi_cache_start - 1;
594
595 /* Restore the bidi iterator state saved in the cache. */
596 memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
597
598 /* Pop the previous cache start from the stack. */
599 if (bidi_cache_sp <= 0)
600 abort ();
601 bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
602
603 /* Invalidate the last-used cache slot data. */
604 bidi_cache_last_idx = -1;
605}
606
ed94e6d7
EZ
607/* Stash away a copy of the cache and its control variables. */
608void *
609bidi_shelve_cache (void)
610{
611 unsigned char *databuf;
612
613 if (bidi_cache_idx == 0)
614 return NULL;
615
616 databuf = xmalloc (sizeof (bidi_cache_idx)
617 + bidi_cache_idx * sizeof (struct bidi_it)
618 + sizeof (bidi_cache_start_stack)
619 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
620 + sizeof (bidi_cache_last_idx));
621 memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
622 memcpy (databuf + sizeof (bidi_cache_idx),
623 bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
624 memcpy (databuf + sizeof (bidi_cache_idx)
625 + bidi_cache_idx * sizeof (struct bidi_it),
626 bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
627 memcpy (databuf + sizeof (bidi_cache_idx)
628 + bidi_cache_idx * sizeof (struct bidi_it)
629 + sizeof (bidi_cache_start_stack),
630 &bidi_cache_sp, sizeof (bidi_cache_sp));
631 memcpy (databuf + sizeof (bidi_cache_idx)
632 + bidi_cache_idx * sizeof (struct bidi_it)
633 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
634 &bidi_cache_start, sizeof (bidi_cache_start));
635 memcpy (databuf + sizeof (bidi_cache_idx)
636 + bidi_cache_idx * sizeof (struct bidi_it)
637 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
638 + sizeof (bidi_cache_start),
639 &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
640
641 return databuf;
642}
643
644/* Restore the cache state from a copy stashed away by bidi_shelve_cache. */
645void
646bidi_unshelve_cache (void *databuf)
647{
648 unsigned char *p = databuf;
649
650 if (!p)
651 {
652 /* A NULL pointer means an empty cache. */
653 bidi_cache_start = 0;
654 bidi_cache_sp = 0;
655 bidi_cache_reset ();
656 }
657 else
658 {
659 memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
57b3e30b 660 bidi_cache_ensure_space (bidi_cache_idx);
ed94e6d7
EZ
661 memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
662 bidi_cache_idx * sizeof (struct bidi_it));
663 memcpy (bidi_cache_start_stack,
664 p + sizeof (bidi_cache_idx)
665 + bidi_cache_idx * sizeof (struct bidi_it),
666 sizeof (bidi_cache_start_stack));
667 memcpy (&bidi_cache_sp,
668 p + sizeof (bidi_cache_idx)
669 + bidi_cache_idx * sizeof (struct bidi_it)
670 + sizeof (bidi_cache_start_stack),
671 sizeof (bidi_cache_sp));
672 memcpy (&bidi_cache_start,
673 p + sizeof (bidi_cache_idx)
674 + bidi_cache_idx * sizeof (struct bidi_it)
675 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
676 sizeof (bidi_cache_start));
677 memcpy (&bidi_cache_last_idx,
678 p + sizeof (bidi_cache_idx)
679 + bidi_cache_idx * sizeof (struct bidi_it)
680 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
681 + sizeof (bidi_cache_start),
682 sizeof (bidi_cache_last_idx));
683
684 xfree (p);
685 }
686}
687
58b9f433 688\f
cbb09f04
EZ
689/***********************************************************************
690 Initialization
691 ***********************************************************************/
692static void
693bidi_initialize (void)
b7b65b15 694{
be39f003 695
cbb09f04
EZ
696#include "biditype.h"
697#include "bidimirror.h"
be39f003 698
cbb09f04 699 int i;
b7b65b15 700
cbb09f04
EZ
701 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
702 staticpro (&bidi_type_table);
703
704 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
705 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
706 make_number (bidi_type[i].type));
707
708 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
709 staticpro (&bidi_mirror_table);
710
711 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
712 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
713 make_number (bidi_mirror[i].to));
714
715 Qparagraph_start = intern ("paragraph-start");
716 staticpro (&Qparagraph_start);
717 paragraph_start_re = Fsymbol_value (Qparagraph_start);
718 if (!STRINGP (paragraph_start_re))
719 paragraph_start_re = build_string ("\f\\|[ \t]*$");
720 staticpro (&paragraph_start_re);
721 Qparagraph_separate = intern ("paragraph-separate");
722 staticpro (&Qparagraph_separate);
723 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
724 if (!STRINGP (paragraph_separate_re))
725 paragraph_separate_re = build_string ("[ \t\f]*$");
726 staticpro (&paragraph_separate_re);
58b9f433
EZ
727
728 bidi_cache_sp = 0;
729
cbb09f04 730 bidi_initialized = 1;
b7b65b15
EZ
731}
732
cbb09f04
EZ
733/* Do whatever UAX#9 clause X8 says should be done at paragraph's
734 end. */
58b9f433 735static inline void
cbb09f04 736bidi_set_paragraph_end (struct bidi_it *bidi_it)
b7b65b15 737{
cbb09f04
EZ
738 bidi_it->invalid_levels = 0;
739 bidi_it->invalid_rl_levels = -1;
740 bidi_it->stack_idx = 0;
741 bidi_it->resolved_level = bidi_it->level_stack[0].level;
742}
b7b65b15 743
cbb09f04
EZ
744/* Initialize the bidi iterator from buffer/string position CHARPOS. */
745void
746bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
747 struct bidi_it *bidi_it)
748{
749 if (! bidi_initialized)
750 bidi_initialize ();
751 if (charpos >= 0)
752 bidi_it->charpos = charpos;
753 if (bytepos >= 0)
754 bidi_it->bytepos = bytepos;
755 bidi_it->frame_window_p = frame_window_p;
756 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
757 bidi_it->first_elt = 1;
758 bidi_set_paragraph_end (bidi_it);
759 bidi_it->new_paragraph = 1;
760 bidi_it->separator_limit = -1;
761 bidi_it->type = NEUTRAL_B;
762 bidi_it->type_after_w1 = NEUTRAL_B;
763 bidi_it->orig_type = NEUTRAL_B;
764 bidi_it->prev_was_pdf = 0;
765 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
766 bidi_it->prev.orig_type = UNKNOWN_BT;
89d3374a
EZ
767 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
768 bidi_it->last_strong.orig_type = UNKNOWN_BT;
cbb09f04
EZ
769 bidi_it->next_for_neutral.charpos = -1;
770 bidi_it->next_for_neutral.type =
771 bidi_it->next_for_neutral.type_after_w1 =
89d3374a 772 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
cbb09f04
EZ
773 bidi_it->prev_for_neutral.charpos = -1;
774 bidi_it->prev_for_neutral.type =
775 bidi_it->prev_for_neutral.type_after_w1 =
776 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
777 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
778 bidi_it->disp_pos = -1; /* invalid/unknown */
779 /* We can only shrink the cache if we are at the bottom level of its
780 "stack". */
781 if (bidi_cache_start == 0)
782 bidi_cache_shrink ();
58b9f433
EZ
783 else
784 bidi_cache_reset ();
b7b65b15
EZ
785}
786
182ce2d2 787/* Perform initializations for reordering a new line of bidi text. */
6bff6497 788static void
be39f003
EZ
789bidi_line_init (struct bidi_it *bidi_it)
790{
791 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
792 bidi_it->resolved_level = bidi_it->level_stack[0].level;
793 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
794 bidi_it->invalid_levels = 0;
795 bidi_it->invalid_rl_levels = -1;
796 bidi_it->next_en_pos = -1;
797 bidi_it->next_for_ws.type = UNKNOWN_BT;
b44d9321
EZ
798 bidi_set_sor_type (bidi_it,
799 bidi_it->paragraph_dir == R2L ? 1 : 0,
be39f003
EZ
800 bidi_it->level_stack[0].level); /* X10 */
801
802 bidi_cache_reset ();
803}
804
cbb09f04
EZ
805\f
806/***********************************************************************
807 Fetching characters
808 ***********************************************************************/
809
f3014ef5
EZ
810/* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
811 are zero-based character positions in S, BEGBYTE is byte position
812 corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
813 string. */
87e67904
EZ
814static inline EMACS_INT
815bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
f3014ef5 816 const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
87e67904
EZ
817{
818 EMACS_INT pos = beg;
819 const unsigned char *p = s + begbyte, *start = p;
820
f3014ef5
EZ
821 if (unibyte)
822 p = s + end;
823 else
87e67904 824 {
f3014ef5
EZ
825 if (!CHAR_HEAD_P (*p))
826 abort ();
827
828 while (pos < end)
829 {
830 p += BYTES_BY_CHAR_HEAD (*p);
831 pos++;
832 }
87e67904
EZ
833 }
834
835 return p - start;
836}
837
838/* Fetch and returns the character at byte position BYTEPOS. If S is
839 non-NULL, fetch the character from string S; otherwise fetch the
f3014ef5
EZ
840 character from the current buffer. UNIBYTE non-zero means S is a
841 unibyte string. */
87e67904 842static inline int
f3014ef5 843bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
87e67904
EZ
844{
845 if (s)
f3014ef5
EZ
846 {
847 if (unibyte)
848 return s[bytepos];
849 else
850 return STRING_CHAR (s + bytepos);
851 }
87e67904
EZ
852 else
853 return FETCH_MULTIBYTE_CHAR (bytepos);
854}
855
102ebb00
EZ
856/* Fetch and return the character at BYTEPOS/CHARPOS. If that
857 character is covered by a display string, treat the entire run of
858 covered characters as a single character u+FFFC, and return their
859 combined length in CH_LEN and NCHARS. DISP_POS specifies the
860 character position of the next display string, or -1 if not yet
861 computed. When the next character is at or beyond that position,
862 the function updates DISP_POS with the position of the next display
9f257352
EZ
863 string. STRING->s is the C string to iterate, or NULL if iterating
864 over a buffer or a Lisp string; in the latter case, STRING->lstring
865 is the Lisp string. */
87e67904 866static inline int
102ebb00 867bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
87e67904 868 struct bidi_string_data *string,
fc6f18ce 869 int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
182ce2d2
EZ
870{
871 int ch;
9f257352
EZ
872 EMACS_INT endpos =
873 (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
6db161be 874 struct text_pos pos;
182ce2d2 875
182ce2d2 876 /* If we got past the last known position of display string, compute
87e67904
EZ
877 the position of the next one. That position could be at CHARPOS. */
878 if (charpos < endpos && charpos > *disp_pos)
6db161be
EZ
879 {
880 SET_TEXT_POS (pos, charpos, bytepos);
881 *disp_pos = compute_display_string_pos (&pos, string, frame_window_p);
882 }
102ebb00
EZ
883
884 /* Fetch the character at BYTEPOS. */
87e67904 885 if (charpos >= endpos)
182ce2d2
EZ
886 {
887 ch = BIDI_EOB;
888 *ch_len = 1;
889 *nchars = 1;
87e67904 890 *disp_pos = endpos;
182ce2d2 891 }
102ebb00 892 else if (charpos >= *disp_pos)
182ce2d2 893 {
7b600102
EZ
894 EMACS_INT disp_end_pos;
895
896 /* We don't expect to find ourselves in the middle of a display
897 property. Hopefully, it will never be needed. */
898 if (charpos > *disp_pos)
899 abort ();
900 /* Return the Unicode Object Replacement Character to represent
87e67904 901 the entire run of characters covered by the display string. */
7b600102 902 ch = 0xFFFC;
87e67904 903 disp_end_pos = compute_display_string_end (*disp_pos, string);
7b600102 904 *nchars = disp_end_pos - *disp_pos;
f3014ef5
EZ
905 if (*nchars <= 0)
906 abort ();
87e67904
EZ
907 if (string->s)
908 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
f3014ef5 909 disp_end_pos, string->unibyte);
9f257352
EZ
910 else if (STRINGP (string->lstring))
911 *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
f3014ef5 912 bytepos, disp_end_pos, string->unibyte);
87e67904
EZ
913 else
914 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
182ce2d2 915 }
182ce2d2
EZ
916 else
917 {
87e67904
EZ
918 if (string->s)
919 {
920 EMACS_INT len;
921
f3014ef5
EZ
922 if (!string->unibyte)
923 {
924 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
925 *ch_len = len;
926 }
927 else
928 {
929 ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
930 *ch_len = 1;
931 }
87e67904 932 }
9f257352
EZ
933 else if (STRINGP (string->lstring))
934 {
935 EMACS_INT len;
936
f3014ef5
EZ
937 if (!string->unibyte)
938 {
939 ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
940 len);
941 *ch_len = len;
942 }
943 else
944 {
945 ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
946 *ch_len = 1;
947 }
9f257352 948 }
87e67904
EZ
949 else
950 {
951 ch = FETCH_MULTIBYTE_CHAR (bytepos);
952 *ch_len = CHAR_BYTES (ch);
953 }
182ce2d2
EZ
954 *nchars = 1;
955 }
956
957 /* If we just entered a run of characters covered by a display
958 string, compute the position of the next display string. */
87e67904 959 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos)
6db161be
EZ
960 {
961 SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
962 *disp_pos = compute_display_string_pos (&pos, string, frame_window_p);
963 }
182ce2d2
EZ
964
965 return ch;
966}
967
cbb09f04
EZ
968\f
969/***********************************************************************
970 Determining paragraph direction
971 ***********************************************************************/
972
973/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
974 Value is the non-negative length of the paragraph separator
975 following the buffer position, -1 if position is at the beginning
976 of a new paragraph, or -2 if position is neither at beginning nor
977 at end of a paragraph. */
978static EMACS_INT
979bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
980{
981 Lisp_Object sep_re;
982 Lisp_Object start_re;
983 EMACS_INT val;
984
985 sep_re = paragraph_separate_re;
986 start_re = paragraph_start_re;
987
988 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
989 if (val < 0)
990 {
991 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
992 val = -1;
993 else
994 val = -2;
995 }
996
997 return val;
998}
999
be39f003
EZ
1000/* Find the beginning of this paragraph by looking back in the buffer.
1001 Value is the byte position of the paragraph's beginning. */
1002static EMACS_INT
b44d9321 1003bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
6bff6497 1004{
372b7a95 1005 Lisp_Object re = paragraph_start_re;
6bff6497
EZ
1006 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1007
6bff6497
EZ
1008 while (pos_byte > BEGV_BYTE
1009 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1010 {
182ce2d2
EZ
1011 /* FIXME: What if the paragraph beginning is covered by a
1012 display string? And what if a display string covering some
1013 of the text over which we scan back includes
1014 paragraph_start_re? */
be39f003
EZ
1015 pos = find_next_newline_no_quit (pos - 1, -1);
1016 pos_byte = CHAR_TO_BYTE (pos);
6bff6497 1017 }
be39f003 1018 return pos_byte;
6bff6497
EZ
1019}
1020
bea4f10c 1021/* Determine the base direction, a.k.a. base embedding level, of the
b44d9321
EZ
1022 paragraph we are about to iterate through. If DIR is either L2R or
1023 R2L, just use that. Otherwise, determine the paragraph direction
bea4f10c
EZ
1024 from the first strong directional character of the paragraph.
1025
182ce2d2 1026 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
bea4f10c
EZ
1027 has no strong directional characters and both DIR and
1028 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1029 in the buffer until a paragraph is found with a strong character,
1030 or until hitting BEGV. In the latter case, fall back to L2R. This
1031 flag is used in current-bidi-paragraph-direction.
1032
1033 Note that this function gives the paragraph separator the same
1034 direction as the preceding paragraph, even though Emacs generally
1035 views the separartor as not belonging to any paragraph. */
b7b65b15 1036void
bea4f10c 1037bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
b7b65b15 1038{
6bff6497 1039 EMACS_INT bytepos = bidi_it->bytepos;
9f257352 1040 int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
bea4f10c 1041 EMACS_INT pstartbyte;
87e67904
EZ
1042 /* Note that begbyte is a byte position, while end is a character
1043 position. Yes, this is ugly, but we are trying to avoid costly
1044 calls to BYTE_TO_CHAR and its ilk. */
1045 EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1046 EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
e7402cb2 1047
5e65aec0 1048 /* Special case for an empty buffer. */
87e67904 1049 if (bytepos == begbyte && bidi_it->charpos == end)
5e65aec0 1050 dir = L2R;
9c82e145 1051 /* We should never be called at EOB or before BEGV. */
87e67904 1052 else if (bidi_it->charpos >= end || bytepos < begbyte)
9c82e145
EZ
1053 abort ();
1054
be39f003
EZ
1055 if (dir == L2R)
1056 {
1057 bidi_it->paragraph_dir = L2R;
1058 bidi_it->new_paragraph = 0;
1059 }
1060 else if (dir == R2L)
1061 {
1062 bidi_it->paragraph_dir = R2L;
1063 bidi_it->new_paragraph = 0;
1064 }
b7b65b15
EZ
1065 else if (dir == NEUTRAL_DIR) /* P2 */
1066 {
182ce2d2
EZ
1067 int ch;
1068 EMACS_INT ch_len, nchars;
1069 EMACS_INT pos, disp_pos = -1;
6bff6497 1070 bidi_type_t type;
9f257352 1071 const unsigned char *s;
be39f003 1072
d20e1419
EZ
1073 if (!bidi_initialized)
1074 bidi_initialize ();
1075
be39f003
EZ
1076 /* If we are inside a paragraph separator, we are just waiting
1077 for the separator to be exhausted; use the previous paragraph
e5a2fec7
EZ
1078 direction. But don't do that if we have been just reseated,
1079 because we need to reinitialize below in that case. */
1080 if (!bidi_it->first_elt
1081 && bidi_it->charpos < bidi_it->separator_limit)
be39f003
EZ
1082 return;
1083
b44d9321 1084 /* If we are on a newline, get past it to where the next
5e65aec0
EZ
1085 paragraph might start. But don't do that at BEGV since then
1086 we are potentially in a new paragraph that doesn't yet
1087 exist. */
c143c213 1088 pos = bidi_it->charpos;
9f257352
EZ
1089 s = STRINGP (bidi_it->string.lstring) ?
1090 SDATA (bidi_it->string.lstring) : bidi_it->string.s;
f3014ef5
EZ
1091 if (bytepos > begbyte
1092 && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
be39f003 1093 {
b44d9321 1094 bytepos++;
c143c213 1095 pos++;
be39f003 1096 }
b44d9321
EZ
1097
1098 /* We are either at the beginning of a paragraph or in the
1099 middle of it. Find where this paragraph starts. */
87e67904
EZ
1100 if (string_p)
1101 {
1102 /* We don't support changes of paragraph direction inside a
1103 string. It is treated as a single paragraph. */
1104 pstartbyte = 0;
1105 }
1106 else
1107 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
be39f003
EZ
1108 bidi_it->separator_limit = -1;
1109 bidi_it->new_paragraph = 0;
bea4f10c
EZ
1110
1111 /* The following loop is run more than once only if NO_DEFAULT_P
87e67904 1112 is non-zero, and only if we are iterating on a buffer. */
bea4f10c
EZ
1113 do {
1114 bytepos = pstartbyte;
87e67904
EZ
1115 if (!string_p)
1116 pos = BYTE_TO_CHAR (bytepos);
1117 ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
1118 bidi_it->frame_window_p, &ch_len, &nchars);
bea4f10c
EZ
1119 type = bidi_get_type (ch, NEUTRAL_DIR);
1120
182ce2d2 1121 for (pos += nchars, bytepos += ch_len;
bea4f10c
EZ
1122 /* NOTE: UAX#9 says to search only for L, AL, or R types
1123 of characters, and ignore RLE, RLO, LRE, and LRO.
1124 However, I'm not sure it makes sense to omit those 4;
1125 should try with and without that to see the effect. */
1126 (bidi_get_category (type) != STRONG)
1127 || (bidi_ignore_explicit_marks_for_paragraph_level
1128 && (type == RLE || type == RLO
1129 || type == LRE || type == LRO));
1130 type = bidi_get_type (ch, NEUTRAL_DIR))
1131 {
87e67904
EZ
1132 if (!string_p
1133 && type == NEUTRAL_B
1134 && bidi_at_paragraph_end (pos, bytepos) >= -1)
21fce5ab 1135 break;
87e67904 1136 if (pos >= end)
bea4f10c
EZ
1137 {
1138 /* Pretend there's a paragraph separator at end of
87e67904 1139 buffer/string. */
bea4f10c
EZ
1140 type = NEUTRAL_B;
1141 break;
1142 }
102ebb00 1143 /* Fetch next character and advance to get past it. */
87e67904 1144 ch = bidi_fetch_char (bytepos, pos, &disp_pos, &bidi_it->string,
fc6f18ce 1145 bidi_it->frame_window_p, &ch_len, &nchars);
182ce2d2
EZ
1146 pos += nchars;
1147 bytepos += ch_len;
bea4f10c
EZ
1148 }
1149 if (type == STRONG_R || type == STRONG_AL) /* P3 */
1150 bidi_it->paragraph_dir = R2L;
1151 else if (type == STRONG_L)
1152 bidi_it->paragraph_dir = L2R;
87e67904
EZ
1153 if (!string_p
1154 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
bea4f10c
EZ
1155 {
1156 /* If this paragraph is at BEGV, default to L2R. */
1157 if (pstartbyte == BEGV_BYTE)
1158 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1159 else
1160 {
1161 EMACS_INT prevpbyte = pstartbyte;
1162 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1163
1164 /* Find the beginning of the previous paragraph, if any. */
1165 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1166 {
182ce2d2
EZ
1167 /* FXIME: What if p is covered by a display
1168 string? See also a FIXME inside
1169 bidi_find_paragraph_start. */
bea4f10c
EZ
1170 p--;
1171 pbyte = CHAR_TO_BYTE (p);
1172 prevpbyte = bidi_find_paragraph_start (p, pbyte);
1173 }
1174 pstartbyte = prevpbyte;
1175 }
1176 }
87e67904
EZ
1177 } while (!string_p
1178 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
b7b65b15 1179 }
be39f003
EZ
1180 else
1181 abort ();
b7b65b15 1182
b44d9321
EZ
1183 /* Contrary to UAX#9 clause P3, we only default the paragraph
1184 direction to L2R if we have no previous usable paragraph
bea4f10c 1185 direction. This is allowed by the HL1 clause. */
d20e1419 1186 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
bea4f10c 1187 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
be39f003 1188 if (bidi_it->paragraph_dir == R2L)
b44d9321 1189 bidi_it->level_stack[0].level = 1;
be39f003 1190 else
b44d9321 1191 bidi_it->level_stack[0].level = 0;
be39f003
EZ
1192
1193 bidi_line_init (bidi_it);
b7b65b15
EZ
1194}
1195
cbb09f04
EZ
1196\f
1197/***********************************************************************
1198 Resolving explicit and implicit levels.
58b9f433 1199 The rest of this file constitutes the core of the UBA implementation.
cbb09f04 1200 ***********************************************************************/
b7b65b15 1201
58b9f433 1202static inline int
182ce2d2 1203bidi_explicit_dir_char (int ch)
b7b65b15 1204{
182ce2d2
EZ
1205 bidi_type_t ch_type;
1206
1207 if (!bidi_initialized)
1208 abort ();
1209 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1210 return (ch_type == LRE || ch_type == LRO
1211 || ch_type == RLE || ch_type == RLO
1212 || ch_type == PDF);
b7b65b15
EZ
1213}
1214
1215/* A helper function for bidi_resolve_explicit. It advances to the
1216 next character in logical order and determines the new embedding
1217 level and directional override, but does not take into account
1218 empty embeddings. */
1219static int
1220bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1221{
1222 int curchar;
1223 bidi_type_t type;
1224 int current_level;
1225 int new_level;
1226 bidi_dir_t override;
9f257352 1227 int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
b7b65b15 1228
182ce2d2
EZ
1229 /* If reseat()'ed, don't advance, so as to start iteration from the
1230 position where we were reseated. bidi_it->bytepos can be less
1231 than BEGV_BYTE after reseat to BEGV. */
87e67904 1232 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
9c82e145 1233 || bidi_it->first_elt)
e7402cb2 1234 {
9c82e145 1235 bidi_it->first_elt = 0;
87e67904
EZ
1236 if (string_p)
1237 {
9f257352
EZ
1238 const unsigned char *p =
1239 STRINGP (bidi_it->string.lstring)
1240 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1241
87e67904
EZ
1242 if (bidi_it->charpos < 0)
1243 bidi_it->charpos = 0;
f3014ef5
EZ
1244 bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1245 bidi_it->string.unibyte);
87e67904
EZ
1246 }
1247 else
1248 {
1249 if (bidi_it->charpos < BEGV)
1250 bidi_it->charpos = BEGV;
1251 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1252 }
e7402cb2 1253 }
87e67904
EZ
1254 /* Don't move at end of buffer/string. */
1255 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
b7b65b15 1256 {
182ce2d2
EZ
1257 /* Advance to the next character, skipping characters covered by
1258 display strings (nchars > 1). */
102ebb00
EZ
1259 if (bidi_it->nchars <= 0)
1260 abort ();
182ce2d2 1261 bidi_it->charpos += bidi_it->nchars;
e7402cb2
EZ
1262 if (bidi_it->ch_len == 0)
1263 abort ();
b7b65b15
EZ
1264 bidi_it->bytepos += bidi_it->ch_len;
1265 }
1266
1267 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1268 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1269 new_level = current_level;
1270
87e67904 1271 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
e7402cb2
EZ
1272 {
1273 curchar = BIDI_EOB;
1274 bidi_it->ch_len = 1;
182ce2d2 1275 bidi_it->nchars = 1;
87e67904 1276 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
e7402cb2
EZ
1277 }
1278 else
1279 {
182ce2d2
EZ
1280 /* Fetch the character at BYTEPOS. If it is covered by a
1281 display string, treat the entire run of covered characters as
1282 a single character u+FFFC. */
102ebb00 1283 curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
87e67904
EZ
1284 &bidi_it->disp_pos, &bidi_it->string,
1285 bidi_it->frame_window_p,
102ebb00 1286 &bidi_it->ch_len, &bidi_it->nchars);
e7402cb2 1287 }
b7b65b15 1288 bidi_it->ch = curchar;
b7b65b15 1289
6bff6497
EZ
1290 /* Don't apply directional override here, as all the types we handle
1291 below will not be affected by the override anyway, and we need
1292 the original type unaltered. The override will be applied in
1293 bidi_resolve_weak. */
1294 type = bidi_get_type (curchar, NEUTRAL_DIR);
89d3374a
EZ
1295 bidi_it->orig_type = type;
1296 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
1297
1298 if (type != PDF)
1299 bidi_it->prev_was_pdf = 0;
1300
89d3374a 1301 bidi_it->type_after_w1 = UNKNOWN_BT;
b7b65b15
EZ
1302
1303 switch (type)
1304 {
1305 case RLE: /* X2 */
1306 case RLO: /* X4 */
89d3374a
EZ
1307 bidi_it->type_after_w1 = type;
1308 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 1309 type = WEAK_BN; /* X9/Retaining */
87e67904 1310 if (bidi_it->ignore_bn_limit <= -1)
b7b65b15
EZ
1311 {
1312 if (current_level <= BIDI_MAXLEVEL - 4)
1313 {
1314 /* Compute the least odd embedding level greater than
1315 the current level. */
1316 new_level = ((current_level + 1) & ~1) + 1;
89d3374a 1317 if (bidi_it->type_after_w1 == RLE)
b7b65b15
EZ
1318 override = NEUTRAL_DIR;
1319 else
1320 override = R2L;
1321 if (current_level == BIDI_MAXLEVEL - 4)
1322 bidi_it->invalid_rl_levels = 0;
1323 bidi_push_embedding_level (bidi_it, new_level, override);
1324 }
1325 else
1326 {
1327 bidi_it->invalid_levels++;
1328 /* See the commentary about invalid_rl_levels below. */
1329 if (bidi_it->invalid_rl_levels < 0)
1330 bidi_it->invalid_rl_levels = 0;
1331 bidi_it->invalid_rl_levels++;
1332 }
1333 }
89d3374a 1334 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
1335 || bidi_it->next_en_pos > bidi_it->charpos)
1336 type = WEAK_EN;
1337 break;
1338 case LRE: /* X3 */
1339 case LRO: /* X5 */
89d3374a
EZ
1340 bidi_it->type_after_w1 = type;
1341 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 1342 type = WEAK_BN; /* X9/Retaining */
87e67904 1343 if (bidi_it->ignore_bn_limit <= -1)
b7b65b15
EZ
1344 {
1345 if (current_level <= BIDI_MAXLEVEL - 5)
1346 {
1347 /* Compute the least even embedding level greater than
1348 the current level. */
1349 new_level = ((current_level + 2) & ~1);
89d3374a 1350 if (bidi_it->type_after_w1 == LRE)
b7b65b15
EZ
1351 override = NEUTRAL_DIR;
1352 else
1353 override = L2R;
1354 bidi_push_embedding_level (bidi_it, new_level, override);
1355 }
1356 else
1357 {
1358 bidi_it->invalid_levels++;
1359 /* invalid_rl_levels counts invalid levels encountered
1360 while the embedding level was already too high for
1361 LRE/LRO, but not for RLE/RLO. That is because
1362 there may be exactly one PDF which we should not
1363 ignore even though invalid_levels is non-zero.
1364 invalid_rl_levels helps to know what PDF is
1365 that. */
1366 if (bidi_it->invalid_rl_levels >= 0)
1367 bidi_it->invalid_rl_levels++;
1368 }
1369 }
89d3374a 1370 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
1371 || bidi_it->next_en_pos > bidi_it->charpos)
1372 type = WEAK_EN;
1373 break;
1374 case PDF: /* X7 */
89d3374a
EZ
1375 bidi_it->type_after_w1 = type;
1376 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 1377 type = WEAK_BN; /* X9/Retaining */
87e67904 1378 if (bidi_it->ignore_bn_limit <= -1)
b7b65b15
EZ
1379 {
1380 if (!bidi_it->invalid_rl_levels)
1381 {
1382 new_level = bidi_pop_embedding_level (bidi_it);
1383 bidi_it->invalid_rl_levels = -1;
1384 if (bidi_it->invalid_levels)
1385 bidi_it->invalid_levels--;
1386 /* else nothing: UAX#9 says to ignore invalid PDFs */
1387 }
1388 if (!bidi_it->invalid_levels)
1389 new_level = bidi_pop_embedding_level (bidi_it);
1390 else
1391 {
1392 bidi_it->invalid_levels--;
1393 bidi_it->invalid_rl_levels--;
1394 }
1395 }
89d3374a 1396 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
1397 || bidi_it->next_en_pos > bidi_it->charpos)
1398 type = WEAK_EN;
1399 break;
1400 default:
1401 /* Nothing. */
1402 break;
1403 }
1404
1405 bidi_it->type = type;
2d6e4628 1406 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1407
1408 return new_level;
1409}
1410
1411/* Given an iterator state in BIDI_IT, advance one character position
182ce2d2
EZ
1412 in the buffer/string to the next character (in the logical order),
1413 resolve any explicit embeddings and directional overrides, and
1414 return the embedding level of the character after resolving
1415 explicit directives and ignoring empty embeddings. */
b7b65b15
EZ
1416static int
1417bidi_resolve_explicit (struct bidi_it *bidi_it)
1418{
1419 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1420 int new_level = bidi_resolve_explicit_1 (bidi_it);
87e67904 1421 EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
9f257352
EZ
1422 const unsigned char *s = STRINGP (bidi_it->string.lstring)
1423 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
b7b65b15
EZ
1424
1425 if (prev_level < new_level
1426 && bidi_it->type == WEAK_BN
87e67904
EZ
1427 && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1428 && bidi_it->charpos < eob /* not already at EOB */
1429 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
f3014ef5
EZ
1430 + bidi_it->ch_len, s,
1431 bidi_it->string.unibyte)))
b7b65b15
EZ
1432 {
1433 /* Avoid pushing and popping embedding levels if the level run
1434 is empty, as this breaks level runs where it shouldn't.
1435 UAX#9 removes all the explicit embedding and override codes,
1436 so empty embeddings disappear without a trace. We need to
1437 behave as if we did the same. */
1438 struct bidi_it saved_it;
1439 int level = prev_level;
1440
1441 bidi_copy_it (&saved_it, bidi_it);
1442
87e67904 1443 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
f3014ef5
EZ
1444 + bidi_it->ch_len, s,
1445 bidi_it->string.unibyte)))
b7b65b15 1446 {
182ce2d2
EZ
1447 /* This advances to the next character, skipping any
1448 characters covered by display strings. */
b7b65b15 1449 level = bidi_resolve_explicit_1 (bidi_it);
9f257352
EZ
1450 /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1451 a pointer to its data is no longer valid. */
1452 if (STRINGP (bidi_it->string.lstring))
1453 s = SDATA (bidi_it->string.lstring);
b7b65b15
EZ
1454 }
1455
102ebb00
EZ
1456 if (bidi_it->nchars <= 0)
1457 abort ();
b7b65b15 1458 if (level == prev_level) /* empty embedding */
182ce2d2 1459 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
b7b65b15 1460 else /* this embedding is non-empty */
87e67904 1461 saved_it.ignore_bn_limit = -2;
b7b65b15
EZ
1462
1463 bidi_copy_it (bidi_it, &saved_it);
87e67904 1464 if (bidi_it->ignore_bn_limit > -1)
b7b65b15
EZ
1465 {
1466 /* We pushed a level, but we shouldn't have. Undo that. */
1467 if (!bidi_it->invalid_rl_levels)
1468 {
1469 new_level = bidi_pop_embedding_level (bidi_it);
1470 bidi_it->invalid_rl_levels = -1;
1471 if (bidi_it->invalid_levels)
1472 bidi_it->invalid_levels--;
1473 }
1474 if (!bidi_it->invalid_levels)
1475 new_level = bidi_pop_embedding_level (bidi_it);
1476 else
1477 {
1478 bidi_it->invalid_levels--;
1479 bidi_it->invalid_rl_levels--;
1480 }
1481 }
1482 }
1483
b7b65b15
EZ
1484 if (bidi_it->type == NEUTRAL_B) /* X8 */
1485 {
21fce5ab 1486 bidi_set_paragraph_end (bidi_it);
6bff6497
EZ
1487 /* This is needed by bidi_resolve_weak below, and in L1. */
1488 bidi_it->type_after_w1 = bidi_it->type;
89d3374a 1489 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1490 }
1491
1492 return new_level;
1493}
1494
182ce2d2
EZ
1495/* Advance in the buffer/string, resolve weak types and return the
1496 type of the next character after weak type resolution. */
fd3998ff 1497static bidi_type_t
b7b65b15
EZ
1498bidi_resolve_weak (struct bidi_it *bidi_it)
1499{
1500 bidi_type_t type;
1501 bidi_dir_t override;
1502 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1503 int new_level = bidi_resolve_explicit (bidi_it);
1504 int next_char;
1505 bidi_type_t type_of_next;
1506 struct bidi_it saved_it;
9f257352
EZ
1507 EMACS_INT eob =
1508 (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1509 ? bidi_it->string.schars : ZV;
b7b65b15
EZ
1510
1511 type = bidi_it->type;
1512 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1513
1514 if (type == UNKNOWN_BT
1515 || type == LRE
1516 || type == LRO
1517 || type == RLE
1518 || type == RLO
1519 || type == PDF)
1520 abort ();
1521
1522 if (new_level != prev_level
1523 || bidi_it->type == NEUTRAL_B)
1524 {
1525 /* We've got a new embedding level run, compute the directional
1526 type of sor and initialize per-run variables (UAX#9, clause
1527 X10). */
1528 bidi_set_sor_type (bidi_it, prev_level, new_level);
1529 }
1530 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1531 || type == WEAK_BN || type == STRONG_AL)
89d3374a
EZ
1532 bidi_it->type_after_w1 = type; /* needed in L1 */
1533 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1534
1535 /* Level and directional override status are already recorded in
1536 bidi_it, and do not need any change; see X6. */
1537 if (override == R2L) /* X6 */
1538 type = STRONG_R;
1539 else if (override == L2R)
1540 type = STRONG_L;
bc5a45f3 1541 else
b7b65b15 1542 {
bc5a45f3 1543 if (type == WEAK_NSM) /* W1 */
b7b65b15 1544 {
bc5a45f3 1545 /* Note that we don't need to consider the case where the
5930fe97
EZ
1546 prev character has its type overridden by an RLO or LRO,
1547 because then either the type of this NSM would have been
1548 also overridden, or the previous character is outside the
1549 current level run, and thus not relevant to this NSM.
1550 This is why NSM gets the type_after_w1 of the previous
1551 character. */
ebb5722e
EZ
1552 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1553 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1554 && bidi_it->prev.type_after_w1 != NEUTRAL_B)
5930fe97 1555 type = bidi_it->prev.type_after_w1;
bc5a45f3
EZ
1556 else if (bidi_it->sor == R2L)
1557 type = STRONG_R;
1558 else if (bidi_it->sor == L2R)
1559 type = STRONG_L;
1560 else /* shouldn't happen! */
1561 abort ();
b7b65b15 1562 }
bc5a45f3
EZ
1563 if (type == WEAK_EN /* W2 */
1564 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1565 type = WEAK_AN;
1566 else if (type == STRONG_AL) /* W3 */
1567 type = STRONG_R;
1568 else if ((type == WEAK_ES /* W4 */
1569 && bidi_it->prev.type_after_w1 == WEAK_EN
1570 && bidi_it->prev.orig_type == WEAK_EN)
1571 || (type == WEAK_CS
1572 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1573 && bidi_it->prev.orig_type == WEAK_EN)
1574 || bidi_it->prev.type_after_w1 == WEAK_AN)))
b7b65b15 1575 {
9f257352
EZ
1576 const unsigned char *s =
1577 STRINGP (bidi_it->string.lstring)
1578 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1579
e7402cb2 1580 next_char =
87e67904
EZ
1581 bidi_it->charpos + bidi_it->nchars >= eob
1582 ? BIDI_EOB
f3014ef5
EZ
1583 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1584 bidi_it->string.unibyte);
6bff6497 1585 type_of_next = bidi_get_type (next_char, override);
b7b65b15 1586
bc5a45f3 1587 if (type_of_next == WEAK_BN
b7b65b15
EZ
1588 || bidi_explicit_dir_char (next_char))
1589 {
1590 bidi_copy_it (&saved_it, bidi_it);
1591 while (bidi_resolve_explicit (bidi_it) == new_level
bc5a45f3 1592 && bidi_it->type == WEAK_BN)
b7b65b15
EZ
1593 ;
1594 type_of_next = bidi_it->type;
b7b65b15
EZ
1595 bidi_copy_it (bidi_it, &saved_it);
1596 }
bc5a45f3
EZ
1597
1598 /* If the next character is EN, but the last strong-type
1599 character is AL, that next EN will be changed to AN when
1600 we process it in W2 above. So in that case, this ES
1601 should not be changed into EN. */
1602 if (type == WEAK_ES
1603 && type_of_next == WEAK_EN
1604 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1605 type = WEAK_EN;
1606 else if (type == WEAK_CS)
b7b65b15 1607 {
bc5a45f3
EZ
1608 if (bidi_it->prev.type_after_w1 == WEAK_AN
1609 && (type_of_next == WEAK_AN
1610 /* If the next character is EN, but the last
1611 strong-type character is AL, EN will be later
1612 changed to AN when we process it in W2 above.
1613 So in that case, this ES should not be
1614 changed into EN. */
1615 || (type_of_next == WEAK_EN
1616 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1617 type = WEAK_AN;
1618 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1619 && type_of_next == WEAK_EN
1620 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1621 type = WEAK_EN;
1622 }
1623 }
1624 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1625 || type == WEAK_BN) /* W5/Retaining */
1626 {
1627 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1628 || bidi_it->next_en_pos > bidi_it->charpos)
1629 type = WEAK_EN;
1630 else /* W5: ET/BN with EN after it. */
1631 {
182ce2d2 1632 EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
9f257352
EZ
1633 const unsigned char *s =
1634 STRINGP (bidi_it->string.lstring)
1635 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
bc5a45f3 1636
102ebb00
EZ
1637 if (bidi_it->nchars <= 0)
1638 abort ();
bc5a45f3 1639 next_char =
87e67904
EZ
1640 bidi_it->charpos + bidi_it->nchars >= eob
1641 ? BIDI_EOB
f3014ef5
EZ
1642 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1643 bidi_it->string.unibyte);
bc5a45f3
EZ
1644 type_of_next = bidi_get_type (next_char, override);
1645
1646 if (type_of_next == WEAK_ET
1647 || type_of_next == WEAK_BN
1648 || bidi_explicit_dir_char (next_char))
1649 {
1650 bidi_copy_it (&saved_it, bidi_it);
1651 while (bidi_resolve_explicit (bidi_it) == new_level
1652 && (bidi_it->type == WEAK_BN
1653 || bidi_it->type == WEAK_ET))
1654 ;
1655 type_of_next = bidi_it->type;
1656 en_pos = bidi_it->charpos;
1657 bidi_copy_it (bidi_it, &saved_it);
1658 }
1659 if (type_of_next == WEAK_EN)
b7b65b15 1660 {
bc5a45f3
EZ
1661 /* If the last strong character is AL, the EN we've
1662 found will become AN when we get to it (W2). */
1663 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1664 {
1665 type = WEAK_EN;
1666 /* Remember this EN position, to speed up processing
1667 of the next ETs. */
1668 bidi_it->next_en_pos = en_pos;
1669 }
1670 else if (type == WEAK_BN)
1671 type = NEUTRAL_ON; /* W6/Retaining */
b7b65b15 1672 }
b7b65b15
EZ
1673 }
1674 }
1675 }
1676
1677 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
89d3374a
EZ
1678 || (type == WEAK_BN
1679 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1680 || bidi_it->prev.type_after_w1 == WEAK_ES
1681 || bidi_it->prev.type_after_w1 == WEAK_ET)))
b7b65b15
EZ
1682 type = NEUTRAL_ON;
1683
1684 /* Store the type we've got so far, before we clobber it with strong
1685 types in W7 and while resolving neutral types. But leave alone
1686 the original types that were recorded above, because we will need
1687 them for the L1 clause. */
89d3374a
EZ
1688 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1689 bidi_it->type_after_w1 = type;
1690 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1691
1692 if (type == WEAK_EN) /* W7 */
1693 {
89d3374a 1694 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
b7b65b15
EZ
1695 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1696 type = STRONG_L;
1697 }
1698
1699 bidi_it->type = type;
2d6e4628 1700 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1701 return type;
1702}
1703
cbb09f04
EZ
1704/* Resolve the type of a neutral character according to the type of
1705 surrounding strong text and the current embedding level. */
58b9f433 1706static inline bidi_type_t
cbb09f04
EZ
1707bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1708{
1709 /* N1: European and Arabic numbers are treated as though they were R. */
1710 if (next_type == WEAK_EN || next_type == WEAK_AN)
1711 next_type = STRONG_R;
1712 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1713 prev_type = STRONG_R;
1714
1715 if (next_type == prev_type) /* N1 */
1716 return next_type;
1717 else if ((lev & 1) == 0) /* N2 */
1718 return STRONG_L;
1719 else
1720 return STRONG_R;
1721}
1722
fd3998ff 1723static bidi_type_t
b7b65b15
EZ
1724bidi_resolve_neutral (struct bidi_it *bidi_it)
1725{
1726 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1727 bidi_type_t type = bidi_resolve_weak (bidi_it);
1728 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1729
1730 if (!(type == STRONG_R
1731 || type == STRONG_L
1732 || type == WEAK_BN
1733 || type == WEAK_EN
1734 || type == WEAK_AN
1735 || type == NEUTRAL_B
1736 || type == NEUTRAL_S
1737 || type == NEUTRAL_WS
1738 || type == NEUTRAL_ON))
1739 abort ();
1740
1741 if (bidi_get_category (type) == NEUTRAL
1742 || (type == WEAK_BN && prev_level == current_level))
1743 {
1744 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1745 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1746 bidi_it->next_for_neutral.type,
1747 current_level);
1748 else
1749 {
1750 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1751 the assumption of batch-style processing; see clauses W4,
1752 W5, and especially N1, which require to look far forward
182ce2d2
EZ
1753 (as well as back) in the buffer/string. May the fleas of
1754 a thousand camels infest the armpits of those who design
b7b65b15
EZ
1755 supposedly general-purpose algorithms by looking at their
1756 own implementations, and fail to consider other possible
1757 implementations! */
1758 struct bidi_it saved_it;
1759 bidi_type_t next_type;
1760
1761 if (bidi_it->scan_dir == -1)
1762 abort ();
1763
1764 bidi_copy_it (&saved_it, bidi_it);
1765 /* Scan the text forward until we find the first non-neutral
1766 character, and then use that to resolve the neutral we
1767 are dealing with now. We also cache the scanned iterator
1768 states, to salvage some of the effort later. */
1769 bidi_cache_iterator_state (bidi_it, 0);
1770 do {
1771 /* Record the info about the previous character, so that
1772 it will be cached below with this state. */
89d3374a 1773 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1774 && bidi_it->type != WEAK_BN)
1775 bidi_remember_char (&bidi_it->prev, bidi_it);
1776 type = bidi_resolve_weak (bidi_it);
1777 /* Paragraph separators have their levels fully resolved
1778 at this point, so cache them as resolved. */
1779 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1780 /* FIXME: implement L1 here, by testing for a newline and
1781 resetting the level for any sequence of whitespace
1782 characters adjacent to it. */
1783 } while (!(type == NEUTRAL_B
1784 || (type != WEAK_BN
1785 && bidi_get_category (type) != NEUTRAL)
1786 /* This is all per level run, so stop when we
1787 reach the end of this level run. */
1788 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1789 current_level));
1790
1791 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1792
1793 switch (type)
1794 {
1795 case STRONG_L:
1796 case STRONG_R:
1797 case STRONG_AL:
1798 next_type = type;
1799 break;
1800 case WEAK_EN:
1801 case WEAK_AN:
1802 /* N1: ``European and Arabic numbers are treated as
1803 though they were R.'' */
1804 next_type = STRONG_R;
1805 saved_it.next_for_neutral.type = STRONG_R;
1806 break;
1807 case WEAK_BN:
1808 if (!bidi_explicit_dir_char (bidi_it->ch))
1809 abort (); /* can't happen: BNs are skipped */
1810 /* FALLTHROUGH */
1811 case NEUTRAL_B:
1812 /* Marched all the way to the end of this level run.
1813 We need to use the eor type, whose information is
1814 stored by bidi_set_sor_type in the prev_for_neutral
1815 member. */
1816 if (saved_it.type != WEAK_BN
89d3374a 1817 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
b7b65b15
EZ
1818 {
1819 next_type = bidi_it->prev_for_neutral.type;
1820 saved_it.next_for_neutral.type = next_type;
2d6e4628 1821 bidi_check_type (next_type);
b7b65b15
EZ
1822 }
1823 else
1824 {
1825 /* This is a BN which does not adjoin neutrals.
1826 Leave its type alone. */
1827 bidi_copy_it (bidi_it, &saved_it);
1828 return bidi_it->type;
1829 }
1830 break;
1831 default:
1832 abort ();
1833 }
1834 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1835 next_type, current_level);
1836 saved_it.type = type;
2d6e4628 1837 bidi_check_type (type);
b7b65b15
EZ
1838 bidi_copy_it (bidi_it, &saved_it);
1839 }
1840 }
1841 return type;
1842}
1843
1844/* Given an iterator state in BIDI_IT, advance one character position
182ce2d2
EZ
1845 in the buffer/string to the next character (in the logical order),
1846 resolve the bidi type of that next character, and return that
1847 type. */
fd3998ff 1848static bidi_type_t
b7b65b15
EZ
1849bidi_type_of_next_char (struct bidi_it *bidi_it)
1850{
1851 bidi_type_t type;
1852
1853 /* This should always be called during a forward scan. */
1854 if (bidi_it->scan_dir != 1)
1855 abort ();
1856
1857 /* Reset the limit until which to ignore BNs if we step out of the
1858 area where we found only empty levels. */
87e67904 1859 if ((bidi_it->ignore_bn_limit > -1
b7b65b15 1860 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
87e67904 1861 || (bidi_it->ignore_bn_limit == -2
b7b65b15 1862 && !bidi_explicit_dir_char (bidi_it->ch)))
87e67904 1863 bidi_it->ignore_bn_limit = -1;
b7b65b15
EZ
1864
1865 type = bidi_resolve_neutral (bidi_it);
1866
1867 return type;
1868}
1869
1870/* Given an iterator state BIDI_IT, advance one character position in
182ce2d2
EZ
1871 the buffer/string to the next character (in the current scan
1872 direction), resolve the embedding and implicit levels of that next
1873 character, and return the resulting level. */
fd3998ff 1874static int
b7b65b15
EZ
1875bidi_level_of_next_char (struct bidi_it *bidi_it)
1876{
1877 bidi_type_t type;
1878 int level, prev_level = -1;
1879 struct bidi_saved_info next_for_neutral;
578b494e 1880 EMACS_INT next_char_pos = -2;
b7b65b15
EZ
1881
1882 if (bidi_it->scan_dir == 1)
1883 {
9f257352
EZ
1884 EMACS_INT eob =
1885 (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1886 ? bidi_it->string.schars : ZV;
1887
b7b65b15 1888 /* There's no sense in trying to advance if we hit end of text. */
9f257352 1889 if (bidi_it->charpos >= eob)
b7b65b15
EZ
1890 return bidi_it->resolved_level;
1891
1892 /* Record the info about the previous character. */
89d3374a 1893 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1894 && bidi_it->type != WEAK_BN)
1895 bidi_remember_char (&bidi_it->prev, bidi_it);
89d3374a
EZ
1896 if (bidi_it->type_after_w1 == STRONG_R
1897 || bidi_it->type_after_w1 == STRONG_L
1898 || bidi_it->type_after_w1 == STRONG_AL)
b7b65b15
EZ
1899 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1900 /* FIXME: it sounds like we don't need both prev and
1901 prev_for_neutral members, but I'm leaving them both for now. */
1902 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1903 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1904 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1905
1906 /* If we overstepped the characters used for resolving neutrals
1907 and whitespace, invalidate their info in the iterator. */
1908 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1909 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1910 if (bidi_it->next_en_pos >= 0
1911 && bidi_it->charpos >= bidi_it->next_en_pos)
1912 bidi_it->next_en_pos = -1;
1913 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1914 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1915 bidi_it->next_for_ws.type = UNKNOWN_BT;
1916
1917 /* This must be taken before we fill the iterator with the info
1918 about the next char. If we scan backwards, the iterator
1919 state must be already cached, so there's no need to know the
1920 embedding level of the previous character, since we will be
1921 returning to our caller shortly. */
1922 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1923 }
1924 next_for_neutral = bidi_it->next_for_neutral;
1925
182ce2d2
EZ
1926 /* Perhaps the character we want is already cached. If it is, the
1927 call to bidi_cache_find below will return a type other than
1928 UNKNOWN_BT. */
87e67904 1929 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
102ebb00 1930 {
9f257352
EZ
1931 int bob =
1932 (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
1933
102ebb00
EZ
1934 if (bidi_it->scan_dir > 0)
1935 {
1936 if (bidi_it->nchars <= 0)
1937 abort ();
1938 next_char_pos = bidi_it->charpos + bidi_it->nchars;
1939 }
9f257352 1940 else if (bidi_it->charpos >= bob)
bb269206
EZ
1941 /* Implementation note: we allow next_char_pos to be as low as
1942 0 for buffers or -1 for strings, and that is okay because
1943 that's the "position" of the sentinel iterator state we
1944 cached at the beginning of the iteration. */
102ebb00 1945 next_char_pos = bidi_it->charpos - 1;
578b494e 1946 if (next_char_pos >= bob - 1)
87e67904
EZ
1947 type = bidi_cache_find (next_char_pos, -1, bidi_it);
1948 else
1949 type = UNKNOWN_BT;
102ebb00 1950 }
182ce2d2 1951 else
102ebb00 1952 type = UNKNOWN_BT;
b7b65b15
EZ
1953 if (type != UNKNOWN_BT)
1954 {
1955 /* Don't lose the information for resolving neutrals! The
1956 cached states could have been cached before their
1957 next_for_neutral member was computed. If we are on our way
1958 forward, we can simply take the info from the previous
1959 state. */
1960 if (bidi_it->scan_dir == 1
1961 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1962 bidi_it->next_for_neutral = next_for_neutral;
1963
1964 /* If resolved_level is -1, it means this state was cached
1965 before it was completely resolved, so we cannot return
1966 it. */
1967 if (bidi_it->resolved_level != -1)
1968 return bidi_it->resolved_level;
1969 }
1970 if (bidi_it->scan_dir == -1)
1971 /* If we are going backwards, the iterator state is already cached
1972 from previous scans, and should be fully resolved. */
1973 abort ();
1974
1975 if (type == UNKNOWN_BT)
1976 type = bidi_type_of_next_char (bidi_it);
1977
1978 if (type == NEUTRAL_B)
1979 return bidi_it->resolved_level;
1980
1981 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1982 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1983 || (type == WEAK_BN && prev_level == level))
1984 {
1985 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1986 abort ();
1987
1988 /* If the cached state shows a neutral character, it was not
1989 resolved by bidi_resolve_neutral, so do it now. */
1990 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1991 bidi_it->next_for_neutral.type,
1992 level);
1993 }
1994
1995 if (!(type == STRONG_R
1996 || type == STRONG_L
1997 || type == WEAK_BN
1998 || type == WEAK_EN
1999 || type == WEAK_AN))
2000 abort ();
2001 bidi_it->type = type;
2d6e4628 2002 bidi_check_type (bidi_it->type);
b7b65b15
EZ
2003
2004 /* For L1 below, we need to know, for each WS character, whether
0d327994 2005 it belongs to a sequence of WS characters preceding a newline
b7b65b15 2006 or a TAB or a paragraph separator. */
89d3374a 2007 if (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
2008 && bidi_it->next_for_ws.type == UNKNOWN_BT)
2009 {
2010 int ch;
9d68c2a9 2011 EMACS_INT clen = bidi_it->ch_len;
6bff6497
EZ
2012 EMACS_INT bpos = bidi_it->bytepos;
2013 EMACS_INT cpos = bidi_it->charpos;
182ce2d2 2014 EMACS_INT disp_pos = bidi_it->disp_pos;
102ebb00 2015 EMACS_INT nc = bidi_it->nchars;
87e67904 2016 struct bidi_string_data bs = bidi_it->string;
b7b65b15 2017 bidi_type_t chtype;
fc6f18ce 2018 int fwp = bidi_it->frame_window_p;
b7b65b15 2019
102ebb00
EZ
2020 if (bidi_it->nchars <= 0)
2021 abort ();
b7b65b15 2022 do {
87e67904 2023 ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &bs, fwp,
fc6f18ce 2024 &clen, &nc);
e7402cb2 2025 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
b7b65b15
EZ
2026 chtype = NEUTRAL_B;
2027 else
6bff6497 2028 chtype = bidi_get_type (ch, NEUTRAL_DIR);
b7b65b15
EZ
2029 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2030 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2031 bidi_it->next_for_ws.type = chtype;
2d6e4628 2032 bidi_check_type (bidi_it->next_for_ws.type);
b7b65b15
EZ
2033 bidi_it->next_for_ws.charpos = cpos;
2034 bidi_it->next_for_ws.bytepos = bpos;
2035 }
2036
2037 /* Resolve implicit levels, with a twist: PDFs get the embedding
2038 level of the enbedding they terminate. See below for the
2039 reason. */
89d3374a 2040 if (bidi_it->orig_type == PDF
b7b65b15
EZ
2041 /* Don't do this if this formatting code didn't change the
2042 embedding level due to invalid or empty embeddings. */
2043 && prev_level != level)
2044 {
2045 /* Don't look in UAX#9 for the reason for this: it's our own
2046 private quirk. The reason is that we want the formatting
2047 codes to be delivered so that they bracket the text of their
2048 embedding. For example, given the text
2049
2050 {RLO}teST{PDF}
2051
2052 we want it to be displayed as
2053
0d68907d 2054 {PDF}STet{RLO}
b7b65b15
EZ
2055
2056 not as
2057
2058 STet{RLO}{PDF}
2059
2060 which will result because we bump up the embedding level as
2061 soon as we see the RLO and pop it as soon as we see the PDF,
2062 so RLO itself has the same embedding level as "teST", and
2063 thus would be normally delivered last, just before the PDF.
2064 The switch below fiddles with the level of PDF so that this
2065 ugly side effect does not happen.
2066
2067 (This is, of course, only important if the formatting codes
e7402cb2
EZ
2068 are actually displayed, but Emacs does need to display them
2069 if the user wants to.) */
b7b65b15
EZ
2070 level = prev_level;
2071 }
89d3374a
EZ
2072 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2073 || bidi_it->orig_type == NEUTRAL_S
e7402cb2
EZ
2074 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2075 /* || bidi_it->ch == LINESEP_CHAR */
89d3374a 2076 || (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
2077 && (bidi_it->next_for_ws.type == NEUTRAL_B
2078 || bidi_it->next_for_ws.type == NEUTRAL_S)))
2079 level = bidi_it->level_stack[0].level;
2080 else if ((level & 1) == 0) /* I1 */
2081 {
2082 if (type == STRONG_R)
2083 level++;
2084 else if (type == WEAK_EN || type == WEAK_AN)
2085 level += 2;
2086 }
2087 else /* I2 */
2088 {
2089 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2090 level++;
2091 }
2092
2093 bidi_it->resolved_level = level;
2094 return level;
2095}
2096
2097/* Move to the other edge of a level given by LEVEL. If END_FLAG is
2098 non-zero, we are at the end of a level, and we need to prepare to
2099 resume the scan of the lower level.
2100
2101 If this level's other edge is cached, we simply jump to it, filling
2102 the iterator structure with the iterator state on the other edge.
182ce2d2
EZ
2103 Otherwise, we walk the buffer or string until we come back to the
2104 same level as LEVEL.
b7b65b15
EZ
2105
2106 Note: we are not talking here about a ``level run'' in the UAX#9
2107 sense of the term, but rather about a ``level'' which includes
2108 all the levels higher than it. In other words, given the levels
2109 like this:
2110
2111 11111112222222333333334443343222222111111112223322111
2112 A B C
2113
2114 and assuming we are at point A scanning left to right, this
2115 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2116 at point B. */
2117static void
2118bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2119{
2120 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
2121 int idx;
2122
2123 /* Try the cache first. */
87e67904
EZ
2124 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2125 >= bidi_cache_start)
b7b65b15
EZ
2126 bidi_cache_fetch_state (idx, bidi_it);
2127 else
2128 {
2129 int new_level;
2130
2131 if (end_flag)
2132 abort (); /* if we are at end of level, its edges must be cached */
2133
2134 bidi_cache_iterator_state (bidi_it, 1);
2135 do {
2136 new_level = bidi_level_of_next_char (bidi_it);
2137 bidi_cache_iterator_state (bidi_it, 1);
2138 } while (new_level >= level);
2139 }
2140}
2141
2142void
4b292a22 2143bidi_move_to_visually_next (struct bidi_it *bidi_it)
b7b65b15
EZ
2144{
2145 int old_level, new_level, next_level;
9c82e145 2146 struct bidi_it sentinel;
acb28818 2147 struct gcpro gcpro1;
b7b65b15 2148
87e67904
EZ
2149 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2150 abort ();
2151
b7b65b15
EZ
2152 if (bidi_it->scan_dir == 0)
2153 {
2154 bidi_it->scan_dir = 1; /* default to logical order */
2155 }
2156
acb28818 2157 /* The code below can call eval, and thus cause GC. If we are
7e2ad32c 2158 iterating a Lisp string, make sure it won't be GCed. */
acb28818
EZ
2159 if (STRINGP (bidi_it->string.lstring))
2160 GCPRO1 (bidi_it->string.lstring);
2161
be39f003
EZ
2162 /* If we just passed a newline, initialize for the next line. */
2163 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
2164 bidi_line_init (bidi_it);
2165
6dcfd253
EZ
2166 /* Prepare the sentinel iterator state, and cache it. When we bump
2167 into it, scanning backwards, we'll know that the last non-base
2168 level is exhausted. */
87e67904 2169 if (bidi_cache_idx == bidi_cache_start)
9c82e145
EZ
2170 {
2171 bidi_copy_it (&sentinel, bidi_it);
2172 if (bidi_it->first_elt)
2173 {
2174 sentinel.charpos--; /* cached charpos needs to be monotonic */
2175 sentinel.bytepos--;
2176 sentinel.ch = '\n'; /* doesn't matter, but why not? */
2177 sentinel.ch_len = 1;
182ce2d2 2178 sentinel.nchars = 1;
9c82e145 2179 }
6dcfd253 2180 bidi_cache_iterator_state (&sentinel, 1);
9c82e145 2181 }
b7b65b15
EZ
2182
2183 old_level = bidi_it->resolved_level;
2184 new_level = bidi_level_of_next_char (bidi_it);
b7b65b15
EZ
2185
2186 /* Reordering of resolved levels (clause L2) is implemented by
2187 jumping to the other edge of the level and flipping direction of
c0546589 2188 scanning the text whenever we find a level change. */
b7b65b15
EZ
2189 if (new_level != old_level)
2190 {
2191 int ascending = new_level > old_level;
2192 int level_to_search = ascending ? old_level + 1 : old_level;
2193 int incr = ascending ? 1 : -1;
2194 int expected_next_level = old_level + incr;
2195
b7b65b15
EZ
2196 /* Jump (or walk) to the other edge of this level. */
2197 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2198 /* Switch scan direction and peek at the next character in the
2199 new direction. */
2200 bidi_it->scan_dir = -bidi_it->scan_dir;
2201
2202 /* The following loop handles the case where the resolved level
2203 jumps by more than one. This is typical for numbers inside a
2204 run of text with left-to-right embedding direction, but can
2205 also happen in other situations. In those cases the decision
2206 where to continue after a level change, and in what direction,
2207 is tricky. For example, given a text like below:
2208
2209 abcdefgh
2210 11336622
2211
2212 (where the numbers below the text show the resolved levels),
2213 the result of reordering according to UAX#9 should be this:
2214
2215 efdcghba
2216
2217 This is implemented by the loop below which flips direction
2218 and jumps to the other edge of the level each time it finds
2219 the new level not to be the expected one. The expected level
2220 is always one more or one less than the previous one. */
2221 next_level = bidi_peek_at_next_level (bidi_it);
2222 while (next_level != expected_next_level)
2223 {
2224 expected_next_level += incr;
2225 level_to_search += incr;
2226 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2227 bidi_it->scan_dir = -bidi_it->scan_dir;
2228 next_level = bidi_peek_at_next_level (bidi_it);
2229 }
2230
2231 /* Finally, deliver the next character in the new direction. */
2232 next_level = bidi_level_of_next_char (bidi_it);
2233 }
2234
b44d9321
EZ
2235 /* Take note when we have just processed the newline that precedes
2236 the end of the paragraph. The next time we are about to be
2237 called, set_iterator_to_next will automatically reinit the
2238 paragraph direction, if needed. We do this at the newline before
2239 the paragraph separator, because the next character might not be
2240 the first character of the next paragraph, due to the bidi
c0546589
EZ
2241 reordering, whereas we _must_ know the paragraph base direction
2242 _before_ we process the paragraph's text, since the base
2243 direction affects the reordering. */
87e67904 2244 if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
be39f003 2245 {
87e67904
EZ
2246 /* The paragraph direction of the entire string, once
2247 determined, is in effect for the entire string. Setting the
2248 separator limit to the end of the string prevents
2249 bidi_paragraph_init from being called automatically on this
2250 string. */
9f257352 2251 if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
87e67904
EZ
2252 bidi_it->separator_limit = bidi_it->string.schars;
2253 else if (bidi_it->bytepos < ZV_BYTE)
be39f003 2254 {
87e67904
EZ
2255 EMACS_INT sep_len =
2256 bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2257 bidi_it->bytepos + bidi_it->ch_len);
2258 if (bidi_it->nchars <= 0)
2259 abort ();
2260 if (sep_len >= 0)
2261 {
2262 bidi_it->new_paragraph = 1;
2263 /* Record the buffer position of the last character of the
2264 paragraph separator. */
2265 bidi_it->separator_limit =
2266 bidi_it->charpos + bidi_it->nchars + sep_len;
2267 }
be39f003
EZ
2268 }
2269 }
6bff6497 2270
87e67904 2271 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
b7b65b15
EZ
2272 {
2273 /* If we are at paragraph's base embedding level and beyond the
2274 last cached position, the cache's job is done and we can
2275 discard it. */
2276 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
182ce2d2
EZ
2277 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2278 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
b7b65b15
EZ
2279 bidi_cache_reset ();
2280 /* But as long as we are caching during forward scan, we must
2281 cache each state, or else the cache integrity will be
2282 compromised: it assumes cached states correspond to buffer
2283 positions 1:1. */
2284 else
2285 bidi_cache_iterator_state (bidi_it, 1);
2286 }
acb28818
EZ
2287
2288 if (STRINGP (bidi_it->string.lstring))
2289 UNGCPRO;
b7b65b15
EZ
2290}
2291
2292/* This is meant to be called from within the debugger, whenever you
2293 wish to examine the cache contents. */
e78aecca 2294void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
b7b65b15
EZ
2295void
2296bidi_dump_cached_states (void)
2297{
2298 int i;
2299 int ndigits = 1;
2300
2301 if (bidi_cache_idx == 0)
2302 {
2303 fprintf (stderr, "The cache is empty.\n");
2304 return;
2305 }
2306 fprintf (stderr, "Total of %d state%s in cache:\n",
2307 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2308
2309 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2310 ndigits++;
2311 fputs ("ch ", stderr);
2312 for (i = 0; i < bidi_cache_idx; i++)
2313 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2314 fputs ("\n", stderr);
2315 fputs ("lvl ", stderr);
2316 for (i = 0; i < bidi_cache_idx; i++)
2317 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2318 fputs ("\n", stderr);
2319 fputs ("pos ", stderr);
2320 for (i = 0; i < bidi_cache_idx; i++)
c2982e87 2321 fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
b7b65b15
EZ
2322 fputs ("\n", stderr);
2323}