Convert (most) functions in src to standard C.
[bpt/emacs.git] / src / bidi.c
CommitLineData
b7b65b15 1/* Low-level bidirectional buffer-scanning functions for GNU Emacs.
b118e65f
EZ
2 Copyright (C) 2000, 2001, 2004, 2005, 2009, 2010
3 Free Software Foundation, Inc.
b7b65b15
EZ
4
5This file is part of GNU Emacs.
6
a8d11bd3 7GNU Emacs is free software: you can redistribute it and/or modify
b7b65b15 8it under the terms of the GNU General Public License as published by
a8d11bd3
EZ
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
b7b65b15
EZ
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
b7b65b15 17You should have received a copy of the GNU General Public License
a8d11bd3 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
b7b65b15 19
2d6e4628
EZ
20/* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
b7b65b15
EZ
23 as per UAX#9, a part of the Unicode Standard.
24
25 Unlike the reference and most other implementations, this one is
940afb59
EZ
26 designed to be called once for every character in the buffer or
27 string.
b7b65b15 28
4b292a22 29 The main entry point is bidi_move_to_visually_next. Each time it
b7b65b15
EZ
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
4b292a22
EZ
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
b7b65b15
EZ
36 character by resolving their levels on the fly.
37
940afb59
EZ
38 The two other entry points are bidi_paragraph_init and
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
41 argument character.
42
89d3374a
EZ
43 If you want to understand the code, you will have to read it
44 together with the relevant portions of UAX#9. The comments include
45 references to UAX#9 rules, for that very reason.
46
b7b65b15
EZ
47 A note about references to UAX#9 rules: if the reference says
48 something like "X9/Retaining", it means that you need to refer to
49 rule X9 and to its modifications decribed in the "Implementation
50 Notes" section of UAX#9, under "Retaining Format Codes". */
51
52#ifdef HAVE_CONFIG_H
53#include <config.h>
54#endif
55
56#include <stdio.h>
57
58#ifdef HAVE_STRING_H
59#include <string.h>
60#endif
61
29e3d8d1
EZ
62#include <setjmp.h>
63
b7b65b15
EZ
64#include "lisp.h"
65#include "buffer.h"
66#include "character.h"
67#include "dispextern.h"
68
69static int bidi_initialized = 0;
70
cbc4fd20 71static Lisp_Object bidi_type_table, bidi_mirror_table;
b7b65b15 72
e7402cb2 73/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */
b7b65b15
EZ
74#define LRM_CHAR 0x200E
75#define RLM_CHAR 0x200F
76#define LRE_CHAR 0x202A
77#define RLE_CHAR 0x202B
78#define PDF_CHAR 0x202C
79#define LRO_CHAR 0x202D
80#define RLO_CHAR 0x202E
81
b7b65b15 82#define BIDI_EOB -1
e7402cb2 83#define BIDI_BOB -2 /* FIXME: Is this needed? */
b7b65b15 84
b7b65b15
EZ
85/* Local data structures. (Look in dispextern.h for the rest.) */
86
87/* What we need to know about the current paragraph. */
88struct bidi_paragraph_info {
89 int start_bytepos; /* byte position where it begins */
90 int end_bytepos; /* byte position where it ends */
91 int embedding_level; /* its basic embedding level */
92 bidi_dir_t base_dir; /* its base direction */
93};
94
95/* Data type for describing the bidirectional character categories. */
96typedef enum {
97 UNKNOWN_BC,
98 NEUTRAL,
99 WEAK,
100 STRONG
101} bidi_category_t;
102
103int bidi_ignore_explicit_marks_for_paragraph_level = 1;
104
372b7a95 105static Lisp_Object paragraph_start_re, paragraph_separate_re;
6bff6497 106static Lisp_Object Qparagraph_start, Qparagraph_separate;
b7b65b15 107
b7b65b15 108static void
971de7fb 109bidi_initialize (void)
b7b65b15 110{
317fbf33
EZ
111
112#include "biditype.h"
cbc4fd20 113#include "bidimirror.h"
317fbf33 114
b7b65b15
EZ
115 int i;
116
117 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
2d6e4628 118 staticpro (&bidi_type_table);
b7b65b15
EZ
119
120 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
317fbf33 121 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
b7b65b15 122 make_number (bidi_type[i].type));
6bff6497 123
cbc4fd20
EZ
124 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
125 staticpro (&bidi_mirror_table);
126
127 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
128 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
129 make_number (bidi_mirror[i].to));
130
b4bf28b7
SM
131 Qparagraph_start = intern ("paragraph-start");
132 staticpro (&Qparagraph_start);
372b7a95
EZ
133 paragraph_start_re = Fsymbol_value (Qparagraph_start);
134 if (!STRINGP (paragraph_start_re))
135 paragraph_start_re = build_string ("\f\\|[ \t]*$");
136 staticpro (&paragraph_start_re);
b4bf28b7
SM
137 Qparagraph_separate = intern ("paragraph-separate");
138 staticpro (&Qparagraph_separate);
372b7a95
EZ
139 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
140 if (!STRINGP (paragraph_separate_re))
141 paragraph_separate_re = build_string ("[ \t\f]*$");
142 staticpro (&paragraph_separate_re);
b7b65b15
EZ
143 bidi_initialized = 1;
144}
145
6bff6497
EZ
146/* Return the bidi type of a character CH, subject to the current
147 directional OVERRIDE. */
fd3998ff 148static INLINE bidi_type_t
6bff6497 149bidi_get_type (int ch, bidi_dir_t override)
b7b65b15 150{
6bff6497
EZ
151 bidi_type_t default_type;
152
e7402cb2
EZ
153 if (ch == BIDI_EOB)
154 return NEUTRAL_B;
e342a24d
EZ
155 if (ch < 0 || ch > MAX_CHAR)
156 abort ();
6bff6497
EZ
157
158 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
159
160 if (override == NEUTRAL_DIR)
161 return default_type;
162
163 switch (default_type)
164 {
165 /* Although UAX#9 does not tell, it doesn't make sense to
166 override NEUTRAL_B and LRM/RLM characters. */
167 case NEUTRAL_B:
168 case LRE:
169 case LRO:
170 case RLE:
171 case RLO:
172 case PDF:
173 return default_type;
174 default:
175 switch (ch)
176 {
177 case LRM_CHAR:
178 case RLM_CHAR:
179 return default_type;
180 default:
181 if (override == L2R) /* X6 */
182 return STRONG_L;
183 else if (override == R2L)
184 return STRONG_R;
185 else
186 abort (); /* can't happen: handled above */
187 }
188 }
b7b65b15
EZ
189}
190
2d6e4628
EZ
191void
192bidi_check_type (bidi_type_t type)
193{
194 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
195 abort ();
196}
197
b7b65b15 198/* Given a bidi TYPE of a character, return its category. */
fd3998ff 199static INLINE bidi_category_t
b7b65b15
EZ
200bidi_get_category (bidi_type_t type)
201{
202 switch (type)
203 {
204 case UNKNOWN_BT:
205 return UNKNOWN_BC;
206 case STRONG_L:
207 case STRONG_R:
208 case STRONG_AL:
209 case LRE:
210 case LRO:
211 case RLE:
212 case RLO:
213 return STRONG;
214 case PDF: /* ??? really?? */
215 case WEAK_EN:
216 case WEAK_ES:
217 case WEAK_ET:
218 case WEAK_AN:
219 case WEAK_CS:
220 case WEAK_NSM:
221 case WEAK_BN:
222 return WEAK;
223 case NEUTRAL_B:
224 case NEUTRAL_S:
225 case NEUTRAL_WS:
226 case NEUTRAL_ON:
227 return NEUTRAL;
228 default:
229 abort ();
230 }
231}
232
a6e8d97c
EZ
233/* Return the mirrored character of C, if it has one. If C has no
234 mirrored counterpart, return C.
235 Note: The conditions in UAX#9 clause L4 regarding the surrounding
236 context must be tested by the caller. */
b7b65b15
EZ
237int
238bidi_mirror_char (int c)
239{
cbc4fd20
EZ
240 Lisp_Object val;
241
242 if (c == BIDI_EOB)
243 return c;
244 if (c < 0 || c > MAX_CHAR)
245 abort ();
b7b65b15 246
cbc4fd20
EZ
247 val = CHAR_TABLE_REF (bidi_mirror_table, c);
248 if (INTEGERP (val))
b7b65b15 249 {
cbc4fd20 250 int v = XINT (val);
b7b65b15 251
cbc4fd20
EZ
252 if (v < 0 || v > MAX_CHAR)
253 abort ();
254
255 return v;
b7b65b15 256 }
cbc4fd20 257
b7b65b15
EZ
258 return c;
259}
260
261/* Copy the bidi iterator from FROM to TO. To save cycles, this only
262 copies the part of the level stack that is actually in use. */
fd3998ff 263static INLINE void
b7b65b15
EZ
264bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
265{
266 int i;
267
e69a9370
EZ
268 /* Copy everything except the level stack and beyond. */
269 memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0]));
b7b65b15
EZ
270
271 /* Copy the active part of the level stack. */
272 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
273 for (i = 1; i <= from->stack_idx; i++)
274 to->level_stack[i] = from->level_stack[i];
275}
276
277/* Caching the bidi iterator states. */
278
2fe72643
EZ
279#define BIDI_CACHE_CHUNK 200
280static struct bidi_it *bidi_cache;
281static size_t bidi_cache_size = 0;
0416466c 282static size_t elsz = sizeof (struct bidi_it);
2fe72643
EZ
283static int bidi_cache_idx; /* next unused cache slot */
284static int bidi_cache_last_idx; /* slot of last cache hit */
b7b65b15 285
fd3998ff 286static INLINE void
b7b65b15
EZ
287bidi_cache_reset (void)
288{
289 bidi_cache_idx = 0;
290 bidi_cache_last_idx = -1;
291}
292
2fe72643
EZ
293static INLINE void
294bidi_cache_shrink (void)
295{
296 if (bidi_cache_size > BIDI_CACHE_CHUNK)
297 {
0416466c
EZ
298 bidi_cache_size = BIDI_CACHE_CHUNK;
299 bidi_cache =
300 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643
EZ
301 }
302 bidi_cache_reset ();
303}
304
fd3998ff 305static INLINE void
b7b65b15
EZ
306bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
307{
308 int current_scan_dir = bidi_it->scan_dir;
309
310 if (idx < 0 || idx >= bidi_cache_idx)
311 abort ();
312
313 bidi_copy_it (bidi_it, &bidi_cache[idx]);
314 bidi_it->scan_dir = current_scan_dir;
315 bidi_cache_last_idx = idx;
316}
317
318/* Find a cached state with a given CHARPOS and resolved embedding
319 level less or equal to LEVEL. if LEVEL is -1, disregard the
320 resolved levels in cached states. DIR, if non-zero, means search
321 in that direction from the last cache hit. */
fd3998ff 322static INLINE int
b7b65b15
EZ
323bidi_cache_search (int charpos, int level, int dir)
324{
325 int i, i_start;
326
327 if (bidi_cache_idx)
328 {
329 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
330 dir = -1;
331 else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
332 dir = 1;
333 if (dir)
334 i_start = bidi_cache_last_idx;
335 else
336 {
337 dir = -1;
338 i_start = bidi_cache_idx - 1;
339 }
340
341 if (dir < 0)
342 {
343 /* Linear search for now; FIXME! */
344 for (i = i_start; i >= 0; i--)
345 if (bidi_cache[i].charpos == charpos
346 && (level == -1 || bidi_cache[i].resolved_level <= level))
347 return i;
348 }
349 else
350 {
351 for (i = i_start; i < bidi_cache_idx; i++)
352 if (bidi_cache[i].charpos == charpos
353 && (level == -1 || bidi_cache[i].resolved_level <= level))
354 return i;
355 }
356 }
357
358 return -1;
359}
360
361/* Find a cached state where the resolved level changes to a value
362 that is lower than LEVEL, and return its cache slot index. DIR is
363 the direction to search, starting with the last used cache slot.
364 BEFORE, if non-zero, means return the index of the slot that is
365 ``before'' the level change in the search direction. That is,
366 given the cached levels like this:
367
368 1122333442211
369 AB C
370
371 and assuming we are at the position cached at the slot marked with
372 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
373 index of slot B or A, depending whether BEFORE is, respectively,
374 non-zero or zero. */
375static int
376bidi_cache_find_level_change (int level, int dir, int before)
377{
378 if (bidi_cache_idx)
379 {
380 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
381 int incr = before ? 1 : 0;
382
383 if (!dir)
384 dir = -1;
385 else if (!incr)
386 i += dir;
387
388 if (dir < 0)
389 {
390 while (i >= incr)
391 {
392 if (bidi_cache[i - incr].resolved_level >= 0
393 && bidi_cache[i - incr].resolved_level < level)
394 return i;
395 i--;
396 }
397 }
398 else
399 {
400 while (i < bidi_cache_idx - incr)
401 {
402 if (bidi_cache[i + incr].resolved_level >= 0
403 && bidi_cache[i + incr].resolved_level < level)
404 return i;
405 i++;
406 }
407 }
408 }
409
410 return -1;
411}
412
fd3998ff 413static INLINE void
b7b65b15
EZ
414bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
415{
416 int idx;
417
418 /* We should never cache on backward scans. */
419 if (bidi_it->scan_dir == -1)
420 abort ();
421 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
422
423 if (idx < 0)
424 {
425 idx = bidi_cache_idx;
2fe72643
EZ
426 /* Enlarge the cache as needed. */
427 if (idx >= bidi_cache_size)
428 {
0416466c 429 bidi_cache_size += BIDI_CACHE_CHUNK;
2fe72643 430 bidi_cache =
0416466c 431 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643 432 }
bd924a5d
EZ
433 /* Character positions should correspond to cache positions 1:1.
434 If we are outside the range of cached positions, the cache is
435 useless and must be reset. */
436 if (idx > 0 &&
437 (bidi_it->charpos > bidi_cache[idx - 1].charpos + 1
438 || bidi_it->charpos < bidi_cache[0].charpos))
439 {
440 bidi_cache_reset ();
441 idx = 0;
442 }
b7b65b15
EZ
443 bidi_copy_it (&bidi_cache[idx], bidi_it);
444 if (!resolved)
445 bidi_cache[idx].resolved_level = -1;
446 }
447 else
448 {
449 /* Copy only the members which could have changed, to avoid
450 costly copying of the entire struct. */
451 bidi_cache[idx].type = bidi_it->type;
2d6e4628 452 bidi_check_type (bidi_it->type);
89d3374a
EZ
453 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
454 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
455 if (resolved)
456 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
457 else
458 bidi_cache[idx].resolved_level = -1;
459 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
460 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
461 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
462 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
463 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
464 }
465
466 bidi_cache_last_idx = idx;
467 if (idx >= bidi_cache_idx)
468 bidi_cache_idx = idx + 1;
469}
470
fd3998ff 471static INLINE bidi_type_t
b7b65b15
EZ
472bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
473{
474 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
475
476 if (i >= 0)
477 {
478 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
479
5009f85e 480 bidi_copy_it (bidi_it, &bidi_cache[i]);
b7b65b15
EZ
481 bidi_cache_last_idx = i;
482 /* Don't let scan direction from from the cached state override
483 the current scan direction. */
484 bidi_it->scan_dir = current_scan_dir;
485 return bidi_it->type;
486 }
487
488 return UNKNOWN_BT;
489}
490
fd3998ff 491static INLINE int
b7b65b15
EZ
492bidi_peek_at_next_level (struct bidi_it *bidi_it)
493{
494 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
495 abort ();
496 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
497}
498
be39f003
EZ
499/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
500 Value is the non-negative length of the paragraph separator
501 following the buffer position, -1 if position is at the beginning
502 of a new paragraph, or -2 if position is neither at beginning nor
503 at end of a paragraph. */
fd3998ff 504static EMACS_INT
6bff6497 505bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
b7b65b15 506{
b4bf28b7 507 /* FIXME: Why Fbuffer_local_value rather than just Fsymbol_value? */
372b7a95
EZ
508 Lisp_Object sep_re;
509 Lisp_Object start_re;
be39f003
EZ
510 EMACS_INT val;
511
372b7a95
EZ
512 sep_re = paragraph_separate_re;
513 start_re = paragraph_start_re;
be39f003
EZ
514
515 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
516 if (val < 0)
517 {
518 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
519 val = -1;
520 else
521 val = -2;
522 }
b7b65b15 523
be39f003 524 return val;
b7b65b15
EZ
525}
526
527/* Determine the start-of-run (sor) directional type given the two
528 embedding levels on either side of the run boundary. Also, update
529 the saved info about previously seen characters, since that info is
530 generally valid for a single level run. */
fd3998ff 531static INLINE void
b7b65b15
EZ
532bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
533{
534 int higher_level = level_before > level_after ? level_before : level_after;
535
536 /* The prev_was_pdf gork is required for when we have several PDFs
537 in a row. In that case, we want to compute the sor type for the
538 next level run only once: when we see the first PDF. That's
539 because the sor type depends only on the higher of the two levels
540 that we find on the two sides of the level boundary (see UAX#9,
541 clause X10), and so we don't need to know the final embedding
542 level to which we descend after processing all the PDFs. */
e342a24d 543 if (!bidi_it->prev_was_pdf || level_before < level_after)
b7b65b15
EZ
544 /* FIXME: should the default sor direction be user selectable? */
545 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
546 if (level_before > level_after)
547 bidi_it->prev_was_pdf = 1;
548
549 bidi_it->prev.type = UNKNOWN_BT;
89d3374a
EZ
550 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
551 bidi_it->last_strong.orig_type = UNKNOWN_BT;
b7b65b15
EZ
552 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
553 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
554 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
89d3374a
EZ
555 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
556 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15
EZ
557 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
558}
559
6bff6497 560static void
be39f003
EZ
561bidi_line_init (struct bidi_it *bidi_it)
562{
563 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
564 bidi_it->resolved_level = bidi_it->level_stack[0].level;
565 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
566 bidi_it->invalid_levels = 0;
567 bidi_it->invalid_rl_levels = -1;
568 bidi_it->next_en_pos = -1;
569 bidi_it->next_for_ws.type = UNKNOWN_BT;
b44d9321
EZ
570 bidi_set_sor_type (bidi_it,
571 bidi_it->paragraph_dir == R2L ? 1 : 0,
be39f003
EZ
572 bidi_it->level_stack[0].level); /* X10 */
573
574 bidi_cache_reset ();
575}
576
577/* Find the beginning of this paragraph by looking back in the buffer.
578 Value is the byte position of the paragraph's beginning. */
579static EMACS_INT
b44d9321 580bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
6bff6497 581{
372b7a95 582 Lisp_Object re = paragraph_start_re;
6bff6497
EZ
583 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
584
6bff6497
EZ
585 while (pos_byte > BEGV_BYTE
586 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
587 {
be39f003
EZ
588 pos = find_next_newline_no_quit (pos - 1, -1);
589 pos_byte = CHAR_TO_BYTE (pos);
6bff6497 590 }
be39f003 591 return pos_byte;
6bff6497
EZ
592}
593
be39f003 594/* Determine the direction, a.k.a. base embedding level, of the
b44d9321
EZ
595 paragraph we are about to iterate through. If DIR is either L2R or
596 R2L, just use that. Otherwise, determine the paragraph direction
597 from the first strong character of the paragraph.
598
599 Note that this gives the paragraph separator the same direction as
600 the preceding paragraph, even though Emacs generally views the
601 separartor as not belonging to any paragraph. */
b7b65b15
EZ
602void
603bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
604{
6bff6497 605 EMACS_INT bytepos = bidi_it->bytepos;
e7402cb2 606
5e65aec0
EZ
607 /* Special case for an empty buffer. */
608 if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE)
609 dir = L2R;
9c82e145 610 /* We should never be called at EOB or before BEGV. */
5e65aec0 611 else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
9c82e145
EZ
612 abort ();
613
be39f003
EZ
614 if (dir == L2R)
615 {
616 bidi_it->paragraph_dir = L2R;
617 bidi_it->new_paragraph = 0;
618 }
619 else if (dir == R2L)
620 {
621 bidi_it->paragraph_dir = R2L;
622 bidi_it->new_paragraph = 0;
623 }
b7b65b15
EZ
624 else if (dir == NEUTRAL_DIR) /* P2 */
625 {
6bff6497
EZ
626 int ch, ch_len;
627 EMACS_INT pos;
628 bidi_type_t type;
be39f003 629
d20e1419
EZ
630 if (!bidi_initialized)
631 bidi_initialize ();
632
be39f003
EZ
633 /* If we are inside a paragraph separator, we are just waiting
634 for the separator to be exhausted; use the previous paragraph
e5a2fec7
EZ
635 direction. But don't do that if we have been just reseated,
636 because we need to reinitialize below in that case. */
637 if (!bidi_it->first_elt
638 && bidi_it->charpos < bidi_it->separator_limit)
be39f003
EZ
639 return;
640
b44d9321 641 /* If we are on a newline, get past it to where the next
5e65aec0
EZ
642 paragraph might start. But don't do that at BEGV since then
643 we are potentially in a new paragraph that doesn't yet
644 exist. */
c143c213 645 pos = bidi_it->charpos;
5e65aec0 646 if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n')
be39f003 647 {
b44d9321 648 bytepos++;
c143c213 649 pos++;
be39f003 650 }
b44d9321
EZ
651
652 /* We are either at the beginning of a paragraph or in the
653 middle of it. Find where this paragraph starts. */
654 bytepos = bidi_find_paragraph_start (pos, bytepos);
6bff6497 655
be39f003
EZ
656 bidi_it->separator_limit = -1;
657 bidi_it->new_paragraph = 0;
6bff6497
EZ
658 ch = FETCH_CHAR (bytepos);
659 ch_len = CHAR_BYTES (ch);
be39f003 660 pos = BYTE_TO_CHAR (bytepos);
6bff6497 661 type = bidi_get_type (ch, NEUTRAL_DIR);
b7b65b15 662
e342a24d 663 for (pos++, bytepos += ch_len;
b7b65b15
EZ
664 /* NOTE: UAX#9 says to search only for L, AL, or R types of
665 characters, and ignore RLE, RLO, LRE, and LRO. However,
666 I'm not sure it makes sense to omit those 4; should try
667 with and without that to see the effect. */
668 (bidi_get_category (type) != STRONG)
669 || (bidi_ignore_explicit_marks_for_paragraph_level
670 && (type == RLE || type == RLO
671 || type == LRE || type == LRO));
6bff6497 672 type = bidi_get_type (ch, NEUTRAL_DIR))
b7b65b15 673 {
be39f003 674 if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
b7b65b15 675 break;
21fce5ab
EZ
676 if (bytepos >= ZV_BYTE)
677 {
678 /* Pretend there's a paragraph separator at end of buffer. */
679 type = NEUTRAL_B;
680 break;
681 }
b7b65b15
EZ
682 FETCH_CHAR_ADVANCE (ch, pos, bytepos);
683 }
684 if (type == STRONG_R || type == STRONG_AL) /* P3 */
be39f003
EZ
685 bidi_it->paragraph_dir = R2L;
686 else if (type == STRONG_L)
687 bidi_it->paragraph_dir = L2R;
b7b65b15 688 }
be39f003
EZ
689 else
690 abort ();
b7b65b15 691
b44d9321
EZ
692 /* Contrary to UAX#9 clause P3, we only default the paragraph
693 direction to L2R if we have no previous usable paragraph
694 direction. */
d20e1419 695 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
b44d9321 696 bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
be39f003 697 if (bidi_it->paragraph_dir == R2L)
b44d9321 698 bidi_it->level_stack[0].level = 1;
be39f003 699 else
b44d9321 700 bidi_it->level_stack[0].level = 0;
be39f003
EZ
701
702 bidi_line_init (bidi_it);
b7b65b15
EZ
703}
704
6bff6497
EZ
705/* Do whatever UAX#9 clause X8 says should be done at paragraph's
706 end. */
fd3998ff 707static INLINE void
b7b65b15
EZ
708bidi_set_paragraph_end (struct bidi_it *bidi_it)
709{
710 bidi_it->invalid_levels = 0;
711 bidi_it->invalid_rl_levels = -1;
712 bidi_it->stack_idx = 0;
713 bidi_it->resolved_level = bidi_it->level_stack[0].level;
b7b65b15
EZ
714}
715
89d3374a 716/* Initialize the bidi iterator from buffer position CHARPOS. */
b7b65b15 717void
6bff6497 718bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it)
b7b65b15
EZ
719{
720 if (! bidi_initialized)
721 bidi_initialize ();
89d3374a
EZ
722 bidi_it->charpos = charpos;
723 bidi_it->bytepos = bytepos;
6bff6497
EZ
724 bidi_it->first_elt = 1;
725 bidi_set_paragraph_end (bidi_it);
726 bidi_it->new_paragraph = 1;
be39f003 727 bidi_it->separator_limit = -1;
b7b65b15 728 bidi_it->type = NEUTRAL_B;
ebb5722e
EZ
729 bidi_it->type_after_w1 = NEUTRAL_B;
730 bidi_it->orig_type = NEUTRAL_B;
b7b65b15 731 bidi_it->prev_was_pdf = 0;
ebb5722e
EZ
732 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
733 bidi_it->prev.orig_type = UNKNOWN_BT;
89d3374a
EZ
734 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
735 bidi_it->last_strong.orig_type = UNKNOWN_BT;
b7b65b15
EZ
736 bidi_it->next_for_neutral.charpos = -1;
737 bidi_it->next_for_neutral.type =
89d3374a
EZ
738 bidi_it->next_for_neutral.type_after_w1 =
739 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15
EZ
740 bidi_it->prev_for_neutral.charpos = -1;
741 bidi_it->prev_for_neutral.type =
89d3374a
EZ
742 bidi_it->prev_for_neutral.type_after_w1 =
743 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
b7b65b15 744 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
2fe72643 745 bidi_cache_shrink ();
b7b65b15
EZ
746}
747
748/* Push the current embedding level and override status; reset the
749 current level to LEVEL and the current override status to OVERRIDE. */
fd3998ff 750static INLINE void
b7b65b15
EZ
751bidi_push_embedding_level (struct bidi_it *bidi_it,
752 int level, bidi_dir_t override)
753{
754 bidi_it->stack_idx++;
755 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
756 abort ();
757 bidi_it->level_stack[bidi_it->stack_idx].level = level;
758 bidi_it->level_stack[bidi_it->stack_idx].override = override;
759}
760
761/* Pop the embedding level and directional override status from the
762 stack, and return the new level. */
fd3998ff 763static INLINE int
b7b65b15
EZ
764bidi_pop_embedding_level (struct bidi_it *bidi_it)
765{
766 /* UAX#9 says to ignore invalid PDFs. */
767 if (bidi_it->stack_idx > 0)
768 bidi_it->stack_idx--;
769 return bidi_it->level_stack[bidi_it->stack_idx].level;
770}
771
772/* Record in SAVED_INFO the information about the current character. */
fd3998ff 773static INLINE void
b7b65b15
EZ
774bidi_remember_char (struct bidi_saved_info *saved_info,
775 struct bidi_it *bidi_it)
776{
777 saved_info->charpos = bidi_it->charpos;
778 saved_info->bytepos = bidi_it->bytepos;
779 saved_info->type = bidi_it->type;
2d6e4628 780 bidi_check_type (bidi_it->type);
89d3374a
EZ
781 saved_info->type_after_w1 = bidi_it->type_after_w1;
782 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 783 saved_info->orig_type = bidi_it->orig_type;
2d6e4628 784 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
785}
786
787/* Resolve the type of a neutral character according to the type of
788 surrounding strong text and the current embedding level. */
fd3998ff 789static INLINE bidi_type_t
b7b65b15
EZ
790bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
791{
792 /* N1: European and Arabic numbers are treated as though they were R. */
793 if (next_type == WEAK_EN || next_type == WEAK_AN)
794 next_type = STRONG_R;
795 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
796 prev_type = STRONG_R;
797
798 if (next_type == prev_type) /* N1 */
799 return next_type;
800 else if ((lev & 1) == 0) /* N2 */
801 return STRONG_L;
802 else
803 return STRONG_R;
804}
805
fd3998ff 806static INLINE int
b7b65b15
EZ
807bidi_explicit_dir_char (int c)
808{
809 /* FIXME: this should be replaced with a lookup table with suitable
810 bits set, like standard C ctype macros do. */
811 return (c == LRE_CHAR || c == LRO_CHAR
812 || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
813}
814
815/* A helper function for bidi_resolve_explicit. It advances to the
816 next character in logical order and determines the new embedding
817 level and directional override, but does not take into account
818 empty embeddings. */
819static int
820bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
821{
822 int curchar;
823 bidi_type_t type;
824 int current_level;
825 int new_level;
826 bidi_dir_t override;
827
9c82e145
EZ
828 if (bidi_it->bytepos < BEGV_BYTE /* after reseat to BEGV? */
829 || bidi_it->first_elt)
e7402cb2 830 {
9c82e145
EZ
831 bidi_it->first_elt = 0;
832 if (bidi_it->charpos < BEGV)
833 bidi_it->charpos = BEGV;
834 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
e7402cb2 835 }
9c82e145 836 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
b7b65b15
EZ
837 {
838 bidi_it->charpos++;
e7402cb2
EZ
839 if (bidi_it->ch_len == 0)
840 abort ();
b7b65b15
EZ
841 bidi_it->bytepos += bidi_it->ch_len;
842 }
843
844 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
845 override = bidi_it->level_stack[bidi_it->stack_idx].override;
846 new_level = current_level;
847
848 /* in case it is a unibyte character (not yet implemented) */
849 /* _fetch_multibyte_char_len = 1; */
e7402cb2
EZ
850 if (bidi_it->bytepos >= ZV_BYTE)
851 {
852 curchar = BIDI_EOB;
853 bidi_it->ch_len = 1;
854 }
855 else
856 {
857 curchar = FETCH_CHAR (bidi_it->bytepos);
858 bidi_it->ch_len = CHAR_BYTES (curchar);
859 }
b7b65b15 860 bidi_it->ch = curchar;
b7b65b15 861
6bff6497
EZ
862 /* Don't apply directional override here, as all the types we handle
863 below will not be affected by the override anyway, and we need
864 the original type unaltered. The override will be applied in
865 bidi_resolve_weak. */
866 type = bidi_get_type (curchar, NEUTRAL_DIR);
89d3374a
EZ
867 bidi_it->orig_type = type;
868 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
869
870 if (type != PDF)
871 bidi_it->prev_was_pdf = 0;
872
89d3374a 873 bidi_it->type_after_w1 = UNKNOWN_BT;
b7b65b15
EZ
874
875 switch (type)
876 {
877 case RLE: /* X2 */
878 case RLO: /* X4 */
89d3374a
EZ
879 bidi_it->type_after_w1 = type;
880 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
881 type = WEAK_BN; /* X9/Retaining */
882 if (bidi_it->ignore_bn_limit <= 0)
883 {
884 if (current_level <= BIDI_MAXLEVEL - 4)
885 {
886 /* Compute the least odd embedding level greater than
887 the current level. */
888 new_level = ((current_level + 1) & ~1) + 1;
89d3374a 889 if (bidi_it->type_after_w1 == RLE)
b7b65b15
EZ
890 override = NEUTRAL_DIR;
891 else
892 override = R2L;
893 if (current_level == BIDI_MAXLEVEL - 4)
894 bidi_it->invalid_rl_levels = 0;
895 bidi_push_embedding_level (bidi_it, new_level, override);
896 }
897 else
898 {
899 bidi_it->invalid_levels++;
900 /* See the commentary about invalid_rl_levels below. */
901 if (bidi_it->invalid_rl_levels < 0)
902 bidi_it->invalid_rl_levels = 0;
903 bidi_it->invalid_rl_levels++;
904 }
905 }
89d3374a 906 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
907 || bidi_it->next_en_pos > bidi_it->charpos)
908 type = WEAK_EN;
909 break;
910 case LRE: /* X3 */
911 case LRO: /* X5 */
89d3374a
EZ
912 bidi_it->type_after_w1 = type;
913 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
914 type = WEAK_BN; /* X9/Retaining */
915 if (bidi_it->ignore_bn_limit <= 0)
916 {
917 if (current_level <= BIDI_MAXLEVEL - 5)
918 {
919 /* Compute the least even embedding level greater than
920 the current level. */
921 new_level = ((current_level + 2) & ~1);
89d3374a 922 if (bidi_it->type_after_w1 == LRE)
b7b65b15
EZ
923 override = NEUTRAL_DIR;
924 else
925 override = L2R;
926 bidi_push_embedding_level (bidi_it, new_level, override);
927 }
928 else
929 {
930 bidi_it->invalid_levels++;
931 /* invalid_rl_levels counts invalid levels encountered
932 while the embedding level was already too high for
933 LRE/LRO, but not for RLE/RLO. That is because
934 there may be exactly one PDF which we should not
935 ignore even though invalid_levels is non-zero.
936 invalid_rl_levels helps to know what PDF is
937 that. */
938 if (bidi_it->invalid_rl_levels >= 0)
939 bidi_it->invalid_rl_levels++;
940 }
941 }
89d3374a 942 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
943 || bidi_it->next_en_pos > bidi_it->charpos)
944 type = WEAK_EN;
945 break;
946 case PDF: /* X7 */
89d3374a
EZ
947 bidi_it->type_after_w1 = type;
948 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
949 type = WEAK_BN; /* X9/Retaining */
950 if (bidi_it->ignore_bn_limit <= 0)
951 {
952 if (!bidi_it->invalid_rl_levels)
953 {
954 new_level = bidi_pop_embedding_level (bidi_it);
955 bidi_it->invalid_rl_levels = -1;
956 if (bidi_it->invalid_levels)
957 bidi_it->invalid_levels--;
958 /* else nothing: UAX#9 says to ignore invalid PDFs */
959 }
960 if (!bidi_it->invalid_levels)
961 new_level = bidi_pop_embedding_level (bidi_it);
962 else
963 {
964 bidi_it->invalid_levels--;
965 bidi_it->invalid_rl_levels--;
966 }
967 }
89d3374a 968 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
969 || bidi_it->next_en_pos > bidi_it->charpos)
970 type = WEAK_EN;
971 break;
972 default:
973 /* Nothing. */
974 break;
975 }
976
977 bidi_it->type = type;
2d6e4628 978 bidi_check_type (bidi_it->type);
b7b65b15
EZ
979
980 return new_level;
981}
982
983/* Given an iterator state in BIDI_IT, advance one character position
984 in the buffer to the next character (in the logical order), resolve
985 any explicit embeddings and directional overrides, and return the
986 embedding level of the character after resolving explicit
987 directives and ignoring empty embeddings. */
988static int
989bidi_resolve_explicit (struct bidi_it *bidi_it)
990{
991 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
992 int new_level = bidi_resolve_explicit_1 (bidi_it);
993
994 if (prev_level < new_level
995 && bidi_it->type == WEAK_BN
996 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1502b819 997 && bidi_it->bytepos < ZV_BYTE /* not already at EOB */
b7b65b15
EZ
998 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
999 + bidi_it->ch_len)))
1000 {
1001 /* Avoid pushing and popping embedding levels if the level run
1002 is empty, as this breaks level runs where it shouldn't.
1003 UAX#9 removes all the explicit embedding and override codes,
1004 so empty embeddings disappear without a trace. We need to
1005 behave as if we did the same. */
1006 struct bidi_it saved_it;
1007 int level = prev_level;
1008
1009 bidi_copy_it (&saved_it, bidi_it);
1010
1011 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1012 + bidi_it->ch_len)))
1013 {
1014 level = bidi_resolve_explicit_1 (bidi_it);
1015 }
1016
1017 if (level == prev_level) /* empty embedding */
1018 saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1019 else /* this embedding is non-empty */
1020 saved_it.ignore_bn_limit = -1;
1021
1022 bidi_copy_it (bidi_it, &saved_it);
1023 if (bidi_it->ignore_bn_limit > 0)
1024 {
1025 /* We pushed a level, but we shouldn't have. Undo that. */
1026 if (!bidi_it->invalid_rl_levels)
1027 {
1028 new_level = bidi_pop_embedding_level (bidi_it);
1029 bidi_it->invalid_rl_levels = -1;
1030 if (bidi_it->invalid_levels)
1031 bidi_it->invalid_levels--;
1032 }
1033 if (!bidi_it->invalid_levels)
1034 new_level = bidi_pop_embedding_level (bidi_it);
1035 else
1036 {
1037 bidi_it->invalid_levels--;
1038 bidi_it->invalid_rl_levels--;
1039 }
1040 }
1041 }
1042
b7b65b15
EZ
1043 if (bidi_it->type == NEUTRAL_B) /* X8 */
1044 {
21fce5ab 1045 bidi_set_paragraph_end (bidi_it);
6bff6497
EZ
1046 /* This is needed by bidi_resolve_weak below, and in L1. */
1047 bidi_it->type_after_w1 = bidi_it->type;
89d3374a 1048 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1049 }
1050
1051 return new_level;
1052}
1053
1054/* Advance in the buffer, resolve weak types and return the type of
1055 the next character after weak type resolution. */
fd3998ff 1056static bidi_type_t
b7b65b15
EZ
1057bidi_resolve_weak (struct bidi_it *bidi_it)
1058{
1059 bidi_type_t type;
1060 bidi_dir_t override;
1061 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1062 int new_level = bidi_resolve_explicit (bidi_it);
1063 int next_char;
1064 bidi_type_t type_of_next;
1065 struct bidi_it saved_it;
1066
1067 type = bidi_it->type;
1068 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1069
1070 if (type == UNKNOWN_BT
1071 || type == LRE
1072 || type == LRO
1073 || type == RLE
1074 || type == RLO
1075 || type == PDF)
1076 abort ();
1077
1078 if (new_level != prev_level
1079 || bidi_it->type == NEUTRAL_B)
1080 {
1081 /* We've got a new embedding level run, compute the directional
1082 type of sor and initialize per-run variables (UAX#9, clause
1083 X10). */
1084 bidi_set_sor_type (bidi_it, prev_level, new_level);
1085 }
1086 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1087 || type == WEAK_BN || type == STRONG_AL)
89d3374a
EZ
1088 bidi_it->type_after_w1 = type; /* needed in L1 */
1089 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1090
1091 /* Level and directional override status are already recorded in
1092 bidi_it, and do not need any change; see X6. */
1093 if (override == R2L) /* X6 */
1094 type = STRONG_R;
1095 else if (override == L2R)
1096 type = STRONG_L;
bc5a45f3 1097 else
b7b65b15 1098 {
bc5a45f3 1099 if (type == WEAK_NSM) /* W1 */
b7b65b15 1100 {
bc5a45f3 1101 /* Note that we don't need to consider the case where the
5930fe97
EZ
1102 prev character has its type overridden by an RLO or LRO,
1103 because then either the type of this NSM would have been
1104 also overridden, or the previous character is outside the
1105 current level run, and thus not relevant to this NSM.
1106 This is why NSM gets the type_after_w1 of the previous
1107 character. */
ebb5722e
EZ
1108 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1109 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1110 && bidi_it->prev.type_after_w1 != NEUTRAL_B)
5930fe97 1111 type = bidi_it->prev.type_after_w1;
bc5a45f3
EZ
1112 else if (bidi_it->sor == R2L)
1113 type = STRONG_R;
1114 else if (bidi_it->sor == L2R)
1115 type = STRONG_L;
1116 else /* shouldn't happen! */
1117 abort ();
b7b65b15 1118 }
bc5a45f3
EZ
1119 if (type == WEAK_EN /* W2 */
1120 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1121 type = WEAK_AN;
1122 else if (type == STRONG_AL) /* W3 */
1123 type = STRONG_R;
1124 else if ((type == WEAK_ES /* W4 */
1125 && bidi_it->prev.type_after_w1 == WEAK_EN
1126 && bidi_it->prev.orig_type == WEAK_EN)
1127 || (type == WEAK_CS
1128 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1129 && bidi_it->prev.orig_type == WEAK_EN)
1130 || bidi_it->prev.type_after_w1 == WEAK_AN)))
b7b65b15 1131 {
e7402cb2
EZ
1132 next_char =
1133 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1134 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
6bff6497 1135 type_of_next = bidi_get_type (next_char, override);
b7b65b15 1136
bc5a45f3 1137 if (type_of_next == WEAK_BN
b7b65b15
EZ
1138 || bidi_explicit_dir_char (next_char))
1139 {
1140 bidi_copy_it (&saved_it, bidi_it);
1141 while (bidi_resolve_explicit (bidi_it) == new_level
bc5a45f3 1142 && bidi_it->type == WEAK_BN)
b7b65b15
EZ
1143 ;
1144 type_of_next = bidi_it->type;
b7b65b15
EZ
1145 bidi_copy_it (bidi_it, &saved_it);
1146 }
bc5a45f3
EZ
1147
1148 /* If the next character is EN, but the last strong-type
1149 character is AL, that next EN will be changed to AN when
1150 we process it in W2 above. So in that case, this ES
1151 should not be changed into EN. */
1152 if (type == WEAK_ES
1153 && type_of_next == WEAK_EN
1154 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1155 type = WEAK_EN;
1156 else if (type == WEAK_CS)
b7b65b15 1157 {
bc5a45f3
EZ
1158 if (bidi_it->prev.type_after_w1 == WEAK_AN
1159 && (type_of_next == WEAK_AN
1160 /* If the next character is EN, but the last
1161 strong-type character is AL, EN will be later
1162 changed to AN when we process it in W2 above.
1163 So in that case, this ES should not be
1164 changed into EN. */
1165 || (type_of_next == WEAK_EN
1166 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1167 type = WEAK_AN;
1168 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1169 && type_of_next == WEAK_EN
1170 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1171 type = WEAK_EN;
1172 }
1173 }
1174 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1175 || type == WEAK_BN) /* W5/Retaining */
1176 {
1177 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1178 || bidi_it->next_en_pos > bidi_it->charpos)
1179 type = WEAK_EN;
1180 else /* W5: ET/BN with EN after it. */
1181 {
1182 EMACS_INT en_pos = bidi_it->charpos + 1;
1183
1184 next_char =
1185 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1186 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1187 type_of_next = bidi_get_type (next_char, override);
1188
1189 if (type_of_next == WEAK_ET
1190 || type_of_next == WEAK_BN
1191 || bidi_explicit_dir_char (next_char))
1192 {
1193 bidi_copy_it (&saved_it, bidi_it);
1194 while (bidi_resolve_explicit (bidi_it) == new_level
1195 && (bidi_it->type == WEAK_BN
1196 || bidi_it->type == WEAK_ET))
1197 ;
1198 type_of_next = bidi_it->type;
1199 en_pos = bidi_it->charpos;
1200 bidi_copy_it (bidi_it, &saved_it);
1201 }
1202 if (type_of_next == WEAK_EN)
b7b65b15 1203 {
bc5a45f3
EZ
1204 /* If the last strong character is AL, the EN we've
1205 found will become AN when we get to it (W2). */
1206 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1207 {
1208 type = WEAK_EN;
1209 /* Remember this EN position, to speed up processing
1210 of the next ETs. */
1211 bidi_it->next_en_pos = en_pos;
1212 }
1213 else if (type == WEAK_BN)
1214 type = NEUTRAL_ON; /* W6/Retaining */
b7b65b15 1215 }
b7b65b15
EZ
1216 }
1217 }
1218 }
1219
1220 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
89d3374a
EZ
1221 || (type == WEAK_BN
1222 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1223 || bidi_it->prev.type_after_w1 == WEAK_ES
1224 || bidi_it->prev.type_after_w1 == WEAK_ET)))
b7b65b15
EZ
1225 type = NEUTRAL_ON;
1226
1227 /* Store the type we've got so far, before we clobber it with strong
1228 types in W7 and while resolving neutral types. But leave alone
1229 the original types that were recorded above, because we will need
1230 them for the L1 clause. */
89d3374a
EZ
1231 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1232 bidi_it->type_after_w1 = type;
1233 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1234
1235 if (type == WEAK_EN) /* W7 */
1236 {
89d3374a 1237 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
b7b65b15
EZ
1238 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1239 type = STRONG_L;
1240 }
1241
1242 bidi_it->type = type;
2d6e4628 1243 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1244 return type;
1245}
1246
fd3998ff 1247static bidi_type_t
b7b65b15
EZ
1248bidi_resolve_neutral (struct bidi_it *bidi_it)
1249{
1250 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1251 bidi_type_t type = bidi_resolve_weak (bidi_it);
1252 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1253
1254 if (!(type == STRONG_R
1255 || type == STRONG_L
1256 || type == WEAK_BN
1257 || type == WEAK_EN
1258 || type == WEAK_AN
1259 || type == NEUTRAL_B
1260 || type == NEUTRAL_S
1261 || type == NEUTRAL_WS
1262 || type == NEUTRAL_ON))
1263 abort ();
1264
1265 if (bidi_get_category (type) == NEUTRAL
1266 || (type == WEAK_BN && prev_level == current_level))
1267 {
1268 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1269 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1270 bidi_it->next_for_neutral.type,
1271 current_level);
1272 else
1273 {
1274 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1275 the assumption of batch-style processing; see clauses W4,
1276 W5, and especially N1, which require to look far forward
1277 (as well as back) in the buffer. May the fleas of a
1278 thousand camels infest the armpits of those who design
1279 supposedly general-purpose algorithms by looking at their
1280 own implementations, and fail to consider other possible
1281 implementations! */
1282 struct bidi_it saved_it;
1283 bidi_type_t next_type;
1284
1285 if (bidi_it->scan_dir == -1)
1286 abort ();
1287
1288 bidi_copy_it (&saved_it, bidi_it);
1289 /* Scan the text forward until we find the first non-neutral
1290 character, and then use that to resolve the neutral we
1291 are dealing with now. We also cache the scanned iterator
1292 states, to salvage some of the effort later. */
1293 bidi_cache_iterator_state (bidi_it, 0);
1294 do {
1295 /* Record the info about the previous character, so that
1296 it will be cached below with this state. */
89d3374a 1297 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1298 && bidi_it->type != WEAK_BN)
1299 bidi_remember_char (&bidi_it->prev, bidi_it);
1300 type = bidi_resolve_weak (bidi_it);
1301 /* Paragraph separators have their levels fully resolved
1302 at this point, so cache them as resolved. */
1303 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1304 /* FIXME: implement L1 here, by testing for a newline and
1305 resetting the level for any sequence of whitespace
1306 characters adjacent to it. */
1307 } while (!(type == NEUTRAL_B
1308 || (type != WEAK_BN
1309 && bidi_get_category (type) != NEUTRAL)
1310 /* This is all per level run, so stop when we
1311 reach the end of this level run. */
1312 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1313 current_level));
1314
1315 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1316
1317 switch (type)
1318 {
1319 case STRONG_L:
1320 case STRONG_R:
1321 case STRONG_AL:
1322 next_type = type;
1323 break;
1324 case WEAK_EN:
1325 case WEAK_AN:
1326 /* N1: ``European and Arabic numbers are treated as
1327 though they were R.'' */
1328 next_type = STRONG_R;
1329 saved_it.next_for_neutral.type = STRONG_R;
1330 break;
1331 case WEAK_BN:
1332 if (!bidi_explicit_dir_char (bidi_it->ch))
1333 abort (); /* can't happen: BNs are skipped */
1334 /* FALLTHROUGH */
1335 case NEUTRAL_B:
1336 /* Marched all the way to the end of this level run.
1337 We need to use the eor type, whose information is
1338 stored by bidi_set_sor_type in the prev_for_neutral
1339 member. */
1340 if (saved_it.type != WEAK_BN
89d3374a 1341 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
b7b65b15
EZ
1342 {
1343 next_type = bidi_it->prev_for_neutral.type;
1344 saved_it.next_for_neutral.type = next_type;
2d6e4628 1345 bidi_check_type (next_type);
b7b65b15
EZ
1346 }
1347 else
1348 {
1349 /* This is a BN which does not adjoin neutrals.
1350 Leave its type alone. */
1351 bidi_copy_it (bidi_it, &saved_it);
1352 return bidi_it->type;
1353 }
1354 break;
1355 default:
1356 abort ();
1357 }
1358 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1359 next_type, current_level);
1360 saved_it.type = type;
2d6e4628 1361 bidi_check_type (type);
b7b65b15
EZ
1362 bidi_copy_it (bidi_it, &saved_it);
1363 }
1364 }
1365 return type;
1366}
1367
1368/* Given an iterator state in BIDI_IT, advance one character position
1369 in the buffer to the next character (in the logical order), resolve
1370 the bidi type of that next character, and return that type. */
fd3998ff 1371static bidi_type_t
b7b65b15
EZ
1372bidi_type_of_next_char (struct bidi_it *bidi_it)
1373{
1374 bidi_type_t type;
1375
1376 /* This should always be called during a forward scan. */
1377 if (bidi_it->scan_dir != 1)
1378 abort ();
1379
1380 /* Reset the limit until which to ignore BNs if we step out of the
1381 area where we found only empty levels. */
1382 if ((bidi_it->ignore_bn_limit > 0
1383 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1384 || (bidi_it->ignore_bn_limit == -1
1385 && !bidi_explicit_dir_char (bidi_it->ch)))
1386 bidi_it->ignore_bn_limit = 0;
1387
1388 type = bidi_resolve_neutral (bidi_it);
1389
1390 return type;
1391}
1392
1393/* Given an iterator state BIDI_IT, advance one character position in
1394 the buffer to the next character (in the logical order), resolve
1395 the embedding and implicit levels of that next character, and
1396 return the resulting level. */
fd3998ff 1397static int
b7b65b15
EZ
1398bidi_level_of_next_char (struct bidi_it *bidi_it)
1399{
1400 bidi_type_t type;
1401 int level, prev_level = -1;
1402 struct bidi_saved_info next_for_neutral;
1403
1404 if (bidi_it->scan_dir == 1)
1405 {
1406 /* There's no sense in trying to advance if we hit end of text. */
1502b819 1407 if (bidi_it->bytepos >= ZV_BYTE)
b7b65b15
EZ
1408 return bidi_it->resolved_level;
1409
1410 /* Record the info about the previous character. */
89d3374a 1411 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1412 && bidi_it->type != WEAK_BN)
1413 bidi_remember_char (&bidi_it->prev, bidi_it);
89d3374a
EZ
1414 if (bidi_it->type_after_w1 == STRONG_R
1415 || bidi_it->type_after_w1 == STRONG_L
1416 || bidi_it->type_after_w1 == STRONG_AL)
b7b65b15
EZ
1417 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1418 /* FIXME: it sounds like we don't need both prev and
1419 prev_for_neutral members, but I'm leaving them both for now. */
1420 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1421 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1422 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1423
1424 /* If we overstepped the characters used for resolving neutrals
1425 and whitespace, invalidate their info in the iterator. */
1426 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1427 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1428 if (bidi_it->next_en_pos >= 0
1429 && bidi_it->charpos >= bidi_it->next_en_pos)
1430 bidi_it->next_en_pos = -1;
1431 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1432 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1433 bidi_it->next_for_ws.type = UNKNOWN_BT;
1434
1435 /* This must be taken before we fill the iterator with the info
1436 about the next char. If we scan backwards, the iterator
1437 state must be already cached, so there's no need to know the
1438 embedding level of the previous character, since we will be
1439 returning to our caller shortly. */
1440 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1441 }
1442 next_for_neutral = bidi_it->next_for_neutral;
1443
1444 /* Perhaps it is already cached. */
1445 type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1446 if (type != UNKNOWN_BT)
1447 {
1448 /* Don't lose the information for resolving neutrals! The
1449 cached states could have been cached before their
1450 next_for_neutral member was computed. If we are on our way
1451 forward, we can simply take the info from the previous
1452 state. */
1453 if (bidi_it->scan_dir == 1
1454 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1455 bidi_it->next_for_neutral = next_for_neutral;
1456
1457 /* If resolved_level is -1, it means this state was cached
1458 before it was completely resolved, so we cannot return
1459 it. */
1460 if (bidi_it->resolved_level != -1)
1461 return bidi_it->resolved_level;
1462 }
1463 if (bidi_it->scan_dir == -1)
1464 /* If we are going backwards, the iterator state is already cached
1465 from previous scans, and should be fully resolved. */
1466 abort ();
1467
1468 if (type == UNKNOWN_BT)
1469 type = bidi_type_of_next_char (bidi_it);
1470
1471 if (type == NEUTRAL_B)
1472 return bidi_it->resolved_level;
1473
1474 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1475 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1476 || (type == WEAK_BN && prev_level == level))
1477 {
1478 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1479 abort ();
1480
1481 /* If the cached state shows a neutral character, it was not
1482 resolved by bidi_resolve_neutral, so do it now. */
1483 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1484 bidi_it->next_for_neutral.type,
1485 level);
1486 }
1487
1488 if (!(type == STRONG_R
1489 || type == STRONG_L
1490 || type == WEAK_BN
1491 || type == WEAK_EN
1492 || type == WEAK_AN))
1493 abort ();
1494 bidi_it->type = type;
2d6e4628 1495 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1496
1497 /* For L1 below, we need to know, for each WS character, whether
1498 it belongs to a sequence of WS characters preceeding a newline
1499 or a TAB or a paragraph separator. */
89d3374a 1500 if (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
1501 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1502 {
1503 int ch;
1504 int clen = bidi_it->ch_len;
6bff6497
EZ
1505 EMACS_INT bpos = bidi_it->bytepos;
1506 EMACS_INT cpos = bidi_it->charpos;
b7b65b15
EZ
1507 bidi_type_t chtype;
1508
1509 do {
1510 /*_fetch_multibyte_char_len = 1;*/
e7402cb2 1511 ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
b7b65b15
EZ
1512 bpos += clen;
1513 cpos++;
e7402cb2
EZ
1514 clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1515 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
b7b65b15
EZ
1516 chtype = NEUTRAL_B;
1517 else
6bff6497 1518 chtype = bidi_get_type (ch, NEUTRAL_DIR);
b7b65b15
EZ
1519 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1520 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1521 bidi_it->next_for_ws.type = chtype;
2d6e4628 1522 bidi_check_type (bidi_it->next_for_ws.type);
b7b65b15
EZ
1523 bidi_it->next_for_ws.charpos = cpos;
1524 bidi_it->next_for_ws.bytepos = bpos;
1525 }
1526
1527 /* Resolve implicit levels, with a twist: PDFs get the embedding
1528 level of the enbedding they terminate. See below for the
1529 reason. */
89d3374a 1530 if (bidi_it->orig_type == PDF
b7b65b15
EZ
1531 /* Don't do this if this formatting code didn't change the
1532 embedding level due to invalid or empty embeddings. */
1533 && prev_level != level)
1534 {
1535 /* Don't look in UAX#9 for the reason for this: it's our own
1536 private quirk. The reason is that we want the formatting
1537 codes to be delivered so that they bracket the text of their
1538 embedding. For example, given the text
1539
1540 {RLO}teST{PDF}
1541
1542 we want it to be displayed as
1543
1544 {RLO}STet{PDF}
1545
1546 not as
1547
1548 STet{RLO}{PDF}
1549
1550 which will result because we bump up the embedding level as
1551 soon as we see the RLO and pop it as soon as we see the PDF,
1552 so RLO itself has the same embedding level as "teST", and
1553 thus would be normally delivered last, just before the PDF.
1554 The switch below fiddles with the level of PDF so that this
1555 ugly side effect does not happen.
1556
1557 (This is, of course, only important if the formatting codes
e7402cb2
EZ
1558 are actually displayed, but Emacs does need to display them
1559 if the user wants to.) */
b7b65b15
EZ
1560 level = prev_level;
1561 }
89d3374a
EZ
1562 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1563 || bidi_it->orig_type == NEUTRAL_S
e7402cb2
EZ
1564 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1565 /* || bidi_it->ch == LINESEP_CHAR */
89d3374a 1566 || (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
1567 && (bidi_it->next_for_ws.type == NEUTRAL_B
1568 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1569 level = bidi_it->level_stack[0].level;
1570 else if ((level & 1) == 0) /* I1 */
1571 {
1572 if (type == STRONG_R)
1573 level++;
1574 else if (type == WEAK_EN || type == WEAK_AN)
1575 level += 2;
1576 }
1577 else /* I2 */
1578 {
1579 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1580 level++;
1581 }
1582
1583 bidi_it->resolved_level = level;
1584 return level;
1585}
1586
1587/* Move to the other edge of a level given by LEVEL. If END_FLAG is
1588 non-zero, we are at the end of a level, and we need to prepare to
1589 resume the scan of the lower level.
1590
1591 If this level's other edge is cached, we simply jump to it, filling
1592 the iterator structure with the iterator state on the other edge.
1593 Otherwise, we walk the buffer until we come back to the same level
1594 as LEVEL.
1595
1596 Note: we are not talking here about a ``level run'' in the UAX#9
1597 sense of the term, but rather about a ``level'' which includes
1598 all the levels higher than it. In other words, given the levels
1599 like this:
1600
1601 11111112222222333333334443343222222111111112223322111
1602 A B C
1603
1604 and assuming we are at point A scanning left to right, this
1605 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1606 at point B. */
1607static void
1608bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1609{
1610 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1611 int idx;
1612
1613 /* Try the cache first. */
1614 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1615 bidi_cache_fetch_state (idx, bidi_it);
1616 else
1617 {
1618 int new_level;
1619
1620 if (end_flag)
1621 abort (); /* if we are at end of level, its edges must be cached */
1622
1623 bidi_cache_iterator_state (bidi_it, 1);
1624 do {
1625 new_level = bidi_level_of_next_char (bidi_it);
1626 bidi_cache_iterator_state (bidi_it, 1);
1627 } while (new_level >= level);
1628 }
1629}
1630
1631void
4b292a22 1632bidi_move_to_visually_next (struct bidi_it *bidi_it)
b7b65b15
EZ
1633{
1634 int old_level, new_level, next_level;
9c82e145 1635 struct bidi_it sentinel;
b7b65b15
EZ
1636
1637 if (bidi_it->scan_dir == 0)
1638 {
1639 bidi_it->scan_dir = 1; /* default to logical order */
1640 }
1641
be39f003
EZ
1642 /* If we just passed a newline, initialize for the next line. */
1643 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1644 bidi_line_init (bidi_it);
1645
6dcfd253
EZ
1646 /* Prepare the sentinel iterator state, and cache it. When we bump
1647 into it, scanning backwards, we'll know that the last non-base
1648 level is exhausted. */
b7b65b15 1649 if (bidi_cache_idx == 0)
9c82e145
EZ
1650 {
1651 bidi_copy_it (&sentinel, bidi_it);
1652 if (bidi_it->first_elt)
1653 {
1654 sentinel.charpos--; /* cached charpos needs to be monotonic */
1655 sentinel.bytepos--;
1656 sentinel.ch = '\n'; /* doesn't matter, but why not? */
1657 sentinel.ch_len = 1;
1658 }
6dcfd253 1659 bidi_cache_iterator_state (&sentinel, 1);
9c82e145 1660 }
b7b65b15
EZ
1661
1662 old_level = bidi_it->resolved_level;
1663 new_level = bidi_level_of_next_char (bidi_it);
b7b65b15
EZ
1664
1665 /* Reordering of resolved levels (clause L2) is implemented by
1666 jumping to the other edge of the level and flipping direction of
c0546589 1667 scanning the text whenever we find a level change. */
b7b65b15
EZ
1668 if (new_level != old_level)
1669 {
1670 int ascending = new_level > old_level;
1671 int level_to_search = ascending ? old_level + 1 : old_level;
1672 int incr = ascending ? 1 : -1;
1673 int expected_next_level = old_level + incr;
1674
b7b65b15
EZ
1675 /* Jump (or walk) to the other edge of this level. */
1676 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1677 /* Switch scan direction and peek at the next character in the
1678 new direction. */
1679 bidi_it->scan_dir = -bidi_it->scan_dir;
1680
1681 /* The following loop handles the case where the resolved level
1682 jumps by more than one. This is typical for numbers inside a
1683 run of text with left-to-right embedding direction, but can
1684 also happen in other situations. In those cases the decision
1685 where to continue after a level change, and in what direction,
1686 is tricky. For example, given a text like below:
1687
1688 abcdefgh
1689 11336622
1690
1691 (where the numbers below the text show the resolved levels),
1692 the result of reordering according to UAX#9 should be this:
1693
1694 efdcghba
1695
1696 This is implemented by the loop below which flips direction
1697 and jumps to the other edge of the level each time it finds
1698 the new level not to be the expected one. The expected level
1699 is always one more or one less than the previous one. */
1700 next_level = bidi_peek_at_next_level (bidi_it);
1701 while (next_level != expected_next_level)
1702 {
1703 expected_next_level += incr;
1704 level_to_search += incr;
1705 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1706 bidi_it->scan_dir = -bidi_it->scan_dir;
1707 next_level = bidi_peek_at_next_level (bidi_it);
1708 }
1709
1710 /* Finally, deliver the next character in the new direction. */
1711 next_level = bidi_level_of_next_char (bidi_it);
1712 }
1713
b44d9321
EZ
1714 /* Take note when we have just processed the newline that precedes
1715 the end of the paragraph. The next time we are about to be
1716 called, set_iterator_to_next will automatically reinit the
1717 paragraph direction, if needed. We do this at the newline before
1718 the paragraph separator, because the next character might not be
1719 the first character of the next paragraph, due to the bidi
c0546589
EZ
1720 reordering, whereas we _must_ know the paragraph base direction
1721 _before_ we process the paragraph's text, since the base
1722 direction affects the reordering. */
6bff6497 1723 if (bidi_it->scan_dir == 1
be39f003
EZ
1724 && bidi_it->orig_type == NEUTRAL_B
1725 && bidi_it->bytepos < ZV_BYTE)
1726 {
1727 EMACS_INT sep_len =
1728 bidi_at_paragraph_end (bidi_it->charpos + 1,
1729 bidi_it->bytepos + bidi_it->ch_len);
1730 if (sep_len >= 0)
1731 {
1732 bidi_it->new_paragraph = 1;
b44d9321
EZ
1733 /* Record the buffer position of the last character of the
1734 paragraph separator. */
1735 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
be39f003
EZ
1736 }
1737 }
6bff6497 1738
b7b65b15
EZ
1739 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1740 {
1741 /* If we are at paragraph's base embedding level and beyond the
1742 last cached position, the cache's job is done and we can
1743 discard it. */
1744 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1745 && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1746 bidi_cache_reset ();
1747 /* But as long as we are caching during forward scan, we must
1748 cache each state, or else the cache integrity will be
1749 compromised: it assumes cached states correspond to buffer
1750 positions 1:1. */
1751 else
1752 bidi_cache_iterator_state (bidi_it, 1);
1753 }
1754}
1755
1756/* This is meant to be called from within the debugger, whenever you
1757 wish to examine the cache contents. */
1758void
1759bidi_dump_cached_states (void)
1760{
1761 int i;
1762 int ndigits = 1;
1763
1764 if (bidi_cache_idx == 0)
1765 {
1766 fprintf (stderr, "The cache is empty.\n");
1767 return;
1768 }
1769 fprintf (stderr, "Total of %d state%s in cache:\n",
1770 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1771
1772 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1773 ndigits++;
1774 fputs ("ch ", stderr);
1775 for (i = 0; i < bidi_cache_idx; i++)
1776 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1777 fputs ("\n", stderr);
1778 fputs ("lvl ", stderr);
1779 for (i = 0; i < bidi_cache_idx; i++)
1780 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1781 fputs ("\n", stderr);
1782 fputs ("pos ", stderr);
1783 for (i = 0; i < bidi_cache_idx; i++)
1784 fprintf (stderr, "%*d", ndigits, bidi_cache[i].charpos);
1785 fputs ("\n", stderr);
1786}