* lisp/minibuffer.el (completion-pcm--merge-completions): Don't merge "a1b"
[bpt/emacs.git] / src / bidi.c
CommitLineData
cbb09f04 1/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
73b0cd50 2 Copyright (C) 2000-2001, 2004-2005, 2009-2011
b118e65f 3 Free Software Foundation, Inc.
b7b65b15
EZ
4
5This file is part of GNU Emacs.
6
a8d11bd3 7GNU Emacs is free software: you can redistribute it and/or modify
b7b65b15 8it under the terms of the GNU General Public License as published by
a8d11bd3
EZ
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
b7b65b15
EZ
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
b7b65b15 17You should have received a copy of the GNU General Public License
a8d11bd3 18along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
b7b65b15 19
2d6e4628
EZ
20/* Written by Eli Zaretskii <eliz@gnu.org>.
21
22 A sequential implementation of the Unicode Bidirectional algorithm,
cbb09f04 23 (UBA) as per UAX#9, a part of the Unicode Standard.
b7b65b15
EZ
24
25 Unlike the reference and most other implementations, this one is
940afb59
EZ
26 designed to be called once for every character in the buffer or
27 string.
b7b65b15 28
4b292a22 29 The main entry point is bidi_move_to_visually_next. Each time it
b7b65b15
EZ
30 is called, it finds the next character in the visual order, and
31 returns its information in a special structure. The caller is then
32 expected to process this character for display or any other
4b292a22
EZ
33 purposes, and call bidi_move_to_visually_next for the next
34 character. See the comments in bidi_move_to_visually_next for more
35 details about its algorithm that finds the next visual-order
b7b65b15
EZ
36 character by resolving their levels on the fly.
37
95e1afc6 38 Two other entry points are bidi_paragraph_init and
940afb59
EZ
39 bidi_mirror_char. The first determines the base direction of a
40 paragraph, while the second returns the mirrored version of its
41 argument character.
42
95e1afc6
EZ
43 A few auxiliary entry points are used to initialize the bidi
44 iterator for iterating an object (buffer or string), push and pop
45 the bidi iterator state, and save and restore the state of the bidi
46 cache.
47
89d3374a
EZ
48 If you want to understand the code, you will have to read it
49 together with the relevant portions of UAX#9. The comments include
50 references to UAX#9 rules, for that very reason.
51
b7b65b15
EZ
52 A note about references to UAX#9 rules: if the reference says
53 something like "X9/Retaining", it means that you need to refer to
54 rule X9 and to its modifications decribed in the "Implementation
55 Notes" section of UAX#9, under "Retaining Format Codes". */
56
b7b65b15 57#include <config.h>
b7b65b15 58#include <stdio.h>
29e3d8d1
EZ
59#include <setjmp.h>
60
b7b65b15
EZ
61#include "lisp.h"
62#include "buffer.h"
63#include "character.h"
64#include "dispextern.h"
65
66static int bidi_initialized = 0;
67
cbc4fd20 68static Lisp_Object bidi_type_table, bidi_mirror_table;
b7b65b15
EZ
69
70#define LRM_CHAR 0x200E
71#define RLM_CHAR 0x200F
b7b65b15 72#define BIDI_EOB -1
b7b65b15 73
b7b65b15
EZ
74/* Data type for describing the bidirectional character categories. */
75typedef enum {
76 UNKNOWN_BC,
77 NEUTRAL,
78 WEAK,
79 STRONG
80} bidi_category_t;
81
32413314
EZ
82/* UAX#9 says to search only for L, AL, or R types of characters, and
83 ignore RLE, RLO, LRE, and LRO, when determining the base paragraph
84 level. Yudit indeed ignores them. This variable is therefore set
85 by default to ignore them, but setting it to zero will take them
86 into account. */
e78aecca 87extern int bidi_ignore_explicit_marks_for_paragraph_level EXTERNALLY_VISIBLE;
b7b65b15
EZ
88int bidi_ignore_explicit_marks_for_paragraph_level = 1;
89
372b7a95 90static Lisp_Object paragraph_start_re, paragraph_separate_re;
6bff6497 91static Lisp_Object Qparagraph_start, Qparagraph_separate;
b7b65b15 92
cbb09f04
EZ
93\f
94/***********************************************************************
95 Utilities
96 ***********************************************************************/
b7b65b15 97
6bff6497
EZ
98/* Return the bidi type of a character CH, subject to the current
99 directional OVERRIDE. */
55d4c1b2 100static inline bidi_type_t
6bff6497 101bidi_get_type (int ch, bidi_dir_t override)
b7b65b15 102{
6bff6497
EZ
103 bidi_type_t default_type;
104
e7402cb2
EZ
105 if (ch == BIDI_EOB)
106 return NEUTRAL_B;
e342a24d
EZ
107 if (ch < 0 || ch > MAX_CHAR)
108 abort ();
6bff6497
EZ
109
110 default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
111
112 if (override == NEUTRAL_DIR)
113 return default_type;
114
115 switch (default_type)
116 {
117 /* Although UAX#9 does not tell, it doesn't make sense to
118 override NEUTRAL_B and LRM/RLM characters. */
119 case NEUTRAL_B:
120 case LRE:
121 case LRO:
122 case RLE:
123 case RLO:
124 case PDF:
125 return default_type;
126 default:
127 switch (ch)
128 {
129 case LRM_CHAR:
130 case RLM_CHAR:
131 return default_type;
132 default:
133 if (override == L2R) /* X6 */
134 return STRONG_L;
135 else if (override == R2L)
136 return STRONG_R;
137 else
138 abort (); /* can't happen: handled above */
139 }
140 }
b7b65b15
EZ
141}
142
7d3b3862 143static void
2d6e4628
EZ
144bidi_check_type (bidi_type_t type)
145{
146 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
147 abort ();
148}
149
b7b65b15 150/* Given a bidi TYPE of a character, return its category. */
55d4c1b2 151static inline bidi_category_t
b7b65b15
EZ
152bidi_get_category (bidi_type_t type)
153{
154 switch (type)
155 {
156 case UNKNOWN_BT:
157 return UNKNOWN_BC;
158 case STRONG_L:
159 case STRONG_R:
160 case STRONG_AL:
161 case LRE:
162 case LRO:
163 case RLE:
164 case RLO:
165 return STRONG;
166 case PDF: /* ??? really?? */
167 case WEAK_EN:
168 case WEAK_ES:
169 case WEAK_ET:
170 case WEAK_AN:
171 case WEAK_CS:
172 case WEAK_NSM:
173 case WEAK_BN:
174 return WEAK;
175 case NEUTRAL_B:
176 case NEUTRAL_S:
177 case NEUTRAL_WS:
178 case NEUTRAL_ON:
179 return NEUTRAL;
180 default:
181 abort ();
182 }
183}
184
a6e8d97c
EZ
185/* Return the mirrored character of C, if it has one. If C has no
186 mirrored counterpart, return C.
187 Note: The conditions in UAX#9 clause L4 regarding the surrounding
188 context must be tested by the caller. */
b7b65b15
EZ
189int
190bidi_mirror_char (int c)
191{
cbc4fd20
EZ
192 Lisp_Object val;
193
194 if (c == BIDI_EOB)
195 return c;
196 if (c < 0 || c > MAX_CHAR)
197 abort ();
b7b65b15 198
cbc4fd20
EZ
199 val = CHAR_TABLE_REF (bidi_mirror_table, c);
200 if (INTEGERP (val))
b7b65b15 201 {
cbc4fd20 202 int v = XINT (val);
b7b65b15 203
cbc4fd20
EZ
204 if (v < 0 || v > MAX_CHAR)
205 abort ();
206
207 return v;
b7b65b15 208 }
cbc4fd20 209
b7b65b15
EZ
210 return c;
211}
212
cbb09f04
EZ
213/* Determine the start-of-run (sor) directional type given the two
214 embedding levels on either side of the run boundary. Also, update
215 the saved info about previously seen characters, since that info is
216 generally valid for a single level run. */
58b9f433 217static inline void
cbb09f04
EZ
218bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
219{
220 int higher_level = level_before > level_after ? level_before : level_after;
221
222 /* The prev_was_pdf gork is required for when we have several PDFs
223 in a row. In that case, we want to compute the sor type for the
224 next level run only once: when we see the first PDF. That's
225 because the sor type depends only on the higher of the two levels
226 that we find on the two sides of the level boundary (see UAX#9,
227 clause X10), and so we don't need to know the final embedding
228 level to which we descend after processing all the PDFs. */
229 if (!bidi_it->prev_was_pdf || level_before < level_after)
230 /* FIXME: should the default sor direction be user selectable? */
231 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
232 if (level_before > level_after)
233 bidi_it->prev_was_pdf = 1;
234
235 bidi_it->prev.type = UNKNOWN_BT;
236 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
237 bidi_it->last_strong.orig_type = UNKNOWN_BT;
238 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
239 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
240 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
241 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
242 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
243 bidi_it->ignore_bn_limit = -1; /* meaning it's unknown */
244}
245
246/* Push the current embedding level and override status; reset the
247 current level to LEVEL and the current override status to OVERRIDE. */
58b9f433 248static inline void
cbb09f04
EZ
249bidi_push_embedding_level (struct bidi_it *bidi_it,
250 int level, bidi_dir_t override)
251{
252 bidi_it->stack_idx++;
7e2ad32c 253 xassert (bidi_it->stack_idx < BIDI_MAXLEVEL);
cbb09f04
EZ
254 bidi_it->level_stack[bidi_it->stack_idx].level = level;
255 bidi_it->level_stack[bidi_it->stack_idx].override = override;
256}
257
258/* Pop the embedding level and directional override status from the
259 stack, and return the new level. */
58b9f433 260static inline int
cbb09f04
EZ
261bidi_pop_embedding_level (struct bidi_it *bidi_it)
262{
263 /* UAX#9 says to ignore invalid PDFs. */
264 if (bidi_it->stack_idx > 0)
265 bidi_it->stack_idx--;
266 return bidi_it->level_stack[bidi_it->stack_idx].level;
267}
268
269/* Record in SAVED_INFO the information about the current character. */
58b9f433 270static inline void
cbb09f04
EZ
271bidi_remember_char (struct bidi_saved_info *saved_info,
272 struct bidi_it *bidi_it)
273{
274 saved_info->charpos = bidi_it->charpos;
275 saved_info->bytepos = bidi_it->bytepos;
276 saved_info->type = bidi_it->type;
277 bidi_check_type (bidi_it->type);
278 saved_info->type_after_w1 = bidi_it->type_after_w1;
279 bidi_check_type (bidi_it->type_after_w1);
280 saved_info->orig_type = bidi_it->orig_type;
281 bidi_check_type (bidi_it->orig_type);
282}
283
b7b65b15
EZ
284/* Copy the bidi iterator from FROM to TO. To save cycles, this only
285 copies the part of the level stack that is actually in use. */
55d4c1b2 286static inline void
b7b65b15
EZ
287bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
288{
289 int i;
290
e69a9370 291 /* Copy everything except the level stack and beyond. */
182ce2d2 292 memcpy (to, from, offsetof (struct bidi_it, level_stack[0]));
b7b65b15
EZ
293
294 /* Copy the active part of the level stack. */
295 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
296 for (i = 1; i <= from->stack_idx; i++)
297 to->level_stack[i] = from->level_stack[i];
298}
299
cbb09f04
EZ
300\f
301/***********************************************************************
302 Caching the bidi iterator states
303 ***********************************************************************/
b7b65b15 304
2fe72643
EZ
305#define BIDI_CACHE_CHUNK 200
306static struct bidi_it *bidi_cache;
39e378da 307static ptrdiff_t bidi_cache_size = 0;
ad6042bb 308enum { elsz = sizeof (struct bidi_it) };
39e378da
PE
309static ptrdiff_t bidi_cache_idx; /* next unused cache slot */
310static ptrdiff_t bidi_cache_last_idx; /* slot of last cache hit */
311static ptrdiff_t bidi_cache_start = 0; /* start of cache for this
87e67904
EZ
312 "stack" level */
313
314/* Reset the cache state to the empty state. We only reset the part
315 of the cache relevant to iteration of the current object. Previous
316 objects, which are pushed on the display iterator's stack, are left
317 intact. This is called when the cached information is no more
318 useful for the current iteration, e.g. when we were reseated to a
319 new position on the same object. */
55d4c1b2 320static inline void
b7b65b15
EZ
321bidi_cache_reset (void)
322{
87e67904 323 bidi_cache_idx = bidi_cache_start;
b7b65b15
EZ
324 bidi_cache_last_idx = -1;
325}
326
87e67904
EZ
327/* Shrink the cache to its minimal size. Called when we init the bidi
328 iterator for reordering a buffer or a string that does not come
329 from display properties, because that means all the previously
330 cached info is of no further use. */
55d4c1b2 331static inline void
2fe72643
EZ
332bidi_cache_shrink (void)
333{
334 if (bidi_cache_size > BIDI_CACHE_CHUNK)
335 {
0416466c
EZ
336 bidi_cache_size = BIDI_CACHE_CHUNK;
337 bidi_cache =
338 (struct bidi_it *) xrealloc (bidi_cache, bidi_cache_size * elsz);
2fe72643
EZ
339 }
340 bidi_cache_reset ();
341}
342
55d4c1b2 343static inline void
39e378da 344bidi_cache_fetch_state (ptrdiff_t idx, struct bidi_it *bidi_it)
b7b65b15
EZ
345{
346 int current_scan_dir = bidi_it->scan_dir;
347
87e67904 348 if (idx < bidi_cache_start || idx >= bidi_cache_idx)
b7b65b15
EZ
349 abort ();
350
351 bidi_copy_it (bidi_it, &bidi_cache[idx]);
352 bidi_it->scan_dir = current_scan_dir;
353 bidi_cache_last_idx = idx;
354}
355
356/* Find a cached state with a given CHARPOS and resolved embedding
357 level less or equal to LEVEL. if LEVEL is -1, disregard the
358 resolved levels in cached states. DIR, if non-zero, means search
359 in that direction from the last cache hit. */
39e378da 360static inline ptrdiff_t
61bfec98 361bidi_cache_search (EMACS_INT charpos, int level, int dir)
b7b65b15 362{
39e378da 363 ptrdiff_t i, i_start;
b7b65b15 364
6eec7596 365 if (bidi_cache_idx > bidi_cache_start)
b7b65b15 366 {
6eec7596
EZ
367 if (bidi_cache_last_idx == -1)
368 bidi_cache_last_idx = bidi_cache_idx - 1;
b7b65b15 369 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
182ce2d2
EZ
370 {
371 dir = -1;
372 i_start = bidi_cache_last_idx - 1;
373 }
374 else if (charpos > (bidi_cache[bidi_cache_last_idx].charpos
375 + bidi_cache[bidi_cache_last_idx].nchars - 1))
376 {
377 dir = 1;
378 i_start = bidi_cache_last_idx + 1;
379 }
380 else if (dir)
b7b65b15
EZ
381 i_start = bidi_cache_last_idx;
382 else
383 {
384 dir = -1;
385 i_start = bidi_cache_idx - 1;
386 }
387
388 if (dir < 0)
389 {
390 /* Linear search for now; FIXME! */
87e67904 391 for (i = i_start; i >= bidi_cache_start; i--)
182ce2d2
EZ
392 if (bidi_cache[i].charpos <= charpos
393 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
b7b65b15
EZ
394 && (level == -1 || bidi_cache[i].resolved_level <= level))
395 return i;
396 }
397 else
398 {
399 for (i = i_start; i < bidi_cache_idx; i++)
182ce2d2
EZ
400 if (bidi_cache[i].charpos <= charpos
401 && charpos < bidi_cache[i].charpos + bidi_cache[i].nchars
b7b65b15
EZ
402 && (level == -1 || bidi_cache[i].resolved_level <= level))
403 return i;
404 }
405 }
406
407 return -1;
408}
409
410/* Find a cached state where the resolved level changes to a value
411 that is lower than LEVEL, and return its cache slot index. DIR is
412 the direction to search, starting with the last used cache slot.
87e67904
EZ
413 If DIR is zero, we search backwards from the last occupied cache
414 slot. BEFORE, if non-zero, means return the index of the slot that
415 is ``before'' the level change in the search direction. That is,
b7b65b15
EZ
416 given the cached levels like this:
417
418 1122333442211
419 AB C
420
421 and assuming we are at the position cached at the slot marked with
422 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
423 index of slot B or A, depending whether BEFORE is, respectively,
424 non-zero or zero. */
39e378da 425static ptrdiff_t
b7b65b15
EZ
426bidi_cache_find_level_change (int level, int dir, int before)
427{
428 if (bidi_cache_idx)
429 {
39e378da 430 ptrdiff_t i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
b7b65b15
EZ
431 int incr = before ? 1 : 0;
432
6eec7596
EZ
433 xassert (!dir || bidi_cache_last_idx >= 0);
434
b7b65b15
EZ
435 if (!dir)
436 dir = -1;
437 else if (!incr)
438 i += dir;
439
440 if (dir < 0)
441 {
87e67904 442 while (i >= bidi_cache_start + incr)
b7b65b15
EZ
443 {
444 if (bidi_cache[i - incr].resolved_level >= 0
445 && bidi_cache[i - incr].resolved_level < level)
446 return i;
447 i--;
448 }
449 }
450 else
451 {
452 while (i < bidi_cache_idx - incr)
453 {
454 if (bidi_cache[i + incr].resolved_level >= 0
455 && bidi_cache[i + incr].resolved_level < level)
456 return i;
457 i++;
458 }
459 }
460 }
461
462 return -1;
463}
464
58b9f433 465static inline void
39e378da 466bidi_cache_ensure_space (ptrdiff_t idx)
58b9f433
EZ
467{
468 /* Enlarge the cache as needed. */
469 if (idx >= bidi_cache_size)
470 {
39e378da 471 ptrdiff_t new_size;
f0eb61e9
PE
472
473 /* The bidi cache cannot be larger than the largest Lisp string
474 or buffer. */
475 ptrdiff_t string_or_buffer_bound =
476 max (BUF_BYTES_MAX, STRING_BYTES_BOUND);
477
478 /* Also, it cannot be larger than what C can represent. */
479 ptrdiff_t c_bound = min (PTRDIFF_MAX, SIZE_MAX) / elsz;
480
481 if (min (string_or_buffer_bound, c_bound) <= idx)
39e378da
PE
482 memory_full (SIZE_MAX);
483 new_size = idx - idx % BIDI_CACHE_CHUNK + BIDI_CACHE_CHUNK;
484 bidi_cache = (struct bidi_it *) xrealloc (bidi_cache, new_size * elsz);
485 bidi_cache_size = new_size;
58b9f433
EZ
486 }
487}
488
55d4c1b2 489static inline void
b7b65b15
EZ
490bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
491{
39e378da 492 ptrdiff_t idx;
b7b65b15
EZ
493
494 /* We should never cache on backward scans. */
495 if (bidi_it->scan_dir == -1)
496 abort ();
497 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
498
499 if (idx < 0)
500 {
501 idx = bidi_cache_idx;
58b9f433 502 bidi_cache_ensure_space (idx);
bd924a5d
EZ
503 /* Character positions should correspond to cache positions 1:1.
504 If we are outside the range of cached positions, the cache is
505 useless and must be reset. */
87e67904 506 if (idx > bidi_cache_start &&
182ce2d2
EZ
507 (bidi_it->charpos > (bidi_cache[idx - 1].charpos
508 + bidi_cache[idx - 1].nchars)
a1344e7d 509 || bidi_it->charpos < bidi_cache[bidi_cache_start].charpos))
bd924a5d
EZ
510 {
511 bidi_cache_reset ();
87e67904 512 idx = bidi_cache_start;
bd924a5d 513 }
102ebb00
EZ
514 if (bidi_it->nchars <= 0)
515 abort ();
b7b65b15
EZ
516 bidi_copy_it (&bidi_cache[idx], bidi_it);
517 if (!resolved)
518 bidi_cache[idx].resolved_level = -1;
519 }
520 else
521 {
522 /* Copy only the members which could have changed, to avoid
523 costly copying of the entire struct. */
524 bidi_cache[idx].type = bidi_it->type;
2d6e4628 525 bidi_check_type (bidi_it->type);
89d3374a
EZ
526 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
527 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
528 if (resolved)
529 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
530 else
531 bidi_cache[idx].resolved_level = -1;
532 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
533 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
534 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
535 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
536 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
537 }
538
539 bidi_cache_last_idx = idx;
540 if (idx >= bidi_cache_idx)
541 bidi_cache_idx = idx + 1;
542}
543
55d4c1b2 544static inline bidi_type_t
61bfec98 545bidi_cache_find (EMACS_INT charpos, int level, struct bidi_it *bidi_it)
b7b65b15 546{
39e378da 547 ptrdiff_t i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
b7b65b15 548
87e67904 549 if (i >= bidi_cache_start)
b7b65b15
EZ
550 {
551 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
552
5009f85e 553 bidi_copy_it (bidi_it, &bidi_cache[i]);
b7b65b15
EZ
554 bidi_cache_last_idx = i;
555 /* Don't let scan direction from from the cached state override
556 the current scan direction. */
557 bidi_it->scan_dir = current_scan_dir;
558 return bidi_it->type;
559 }
560
561 return UNKNOWN_BT;
562}
563
55d4c1b2 564static inline int
b7b65b15
EZ
565bidi_peek_at_next_level (struct bidi_it *bidi_it)
566{
87e67904 567 if (bidi_cache_idx == bidi_cache_start || bidi_cache_last_idx == -1)
b7b65b15
EZ
568 abort ();
569 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
570}
571
cbb09f04 572\f
58b9f433
EZ
573/***********************************************************************
574 Pushing and popping the bidi iterator state
575 ***********************************************************************/
0c22566f
EZ
576/* 5-slot stack for saving the start of the previous level of the
577 cache. xdisp.c maintains a 5-slot stack for its iterator state,
578 and we need the same size of our stack. */
39e378da 579static ptrdiff_t bidi_cache_start_stack[IT_STACK_SIZE];
58b9f433
EZ
580static int bidi_cache_sp;
581
582/* Push the bidi iterator state in preparation for reordering a
583 different object, e.g. display string found at certain buffer
7e2ad32c
EZ
584 position. Pushing the bidi iterator boils down to saving its
585 entire state on the cache and starting a new cache "stacked" on top
586 of the current cache. */
58b9f433
EZ
587void
588bidi_push_it (struct bidi_it *bidi_it)
b7b65b15 589{
58b9f433
EZ
590 /* Save the current iterator state in its entirety after the last
591 used cache slot. */
592 bidi_cache_ensure_space (bidi_cache_idx);
593 memcpy (&bidi_cache[bidi_cache_idx++], bidi_it, sizeof (struct bidi_it));
be39f003 594
58b9f433 595 /* Push the current cache start onto the stack. */
7e2ad32c 596 xassert (bidi_cache_sp < IT_STACK_SIZE);
58b9f433 597 bidi_cache_start_stack[bidi_cache_sp++] = bidi_cache_start;
be39f003 598
58b9f433
EZ
599 /* Start a new level of cache, and make it empty. */
600 bidi_cache_start = bidi_cache_idx;
601 bidi_cache_last_idx = -1;
602}
603
604/* Restore the iterator state saved by bidi_push_it and return the
605 cache to the corresponding state. */
606void
607bidi_pop_it (struct bidi_it *bidi_it)
608{
609 if (bidi_cache_start <= 0)
610 abort ();
611
612 /* Reset the next free cache slot index to what it was before the
613 call to bidi_push_it. */
614 bidi_cache_idx = bidi_cache_start - 1;
615
616 /* Restore the bidi iterator state saved in the cache. */
617 memcpy (bidi_it, &bidi_cache[bidi_cache_idx], sizeof (struct bidi_it));
618
619 /* Pop the previous cache start from the stack. */
620 if (bidi_cache_sp <= 0)
621 abort ();
622 bidi_cache_start = bidi_cache_start_stack[--bidi_cache_sp];
623
624 /* Invalidate the last-used cache slot data. */
625 bidi_cache_last_idx = -1;
626}
627
ec7cc85b 628static ptrdiff_t bidi_cache_total_alloc;
35928349 629
ed94e6d7
EZ
630/* Stash away a copy of the cache and its control variables. */
631void *
632bidi_shelve_cache (void)
633{
634 unsigned char *databuf;
635
35928349 636 /* Empty cache. */
ed94e6d7
EZ
637 if (bidi_cache_idx == 0)
638 return NULL;
639
640 databuf = xmalloc (sizeof (bidi_cache_idx)
641 + bidi_cache_idx * sizeof (struct bidi_it)
642 + sizeof (bidi_cache_start_stack)
643 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
644 + sizeof (bidi_cache_last_idx));
35928349
EZ
645 bidi_cache_total_alloc +=
646 sizeof (bidi_cache_idx) + bidi_cache_idx * sizeof (struct bidi_it)
647 + sizeof (bidi_cache_start_stack)
648 + sizeof (bidi_cache_sp) + sizeof (bidi_cache_start)
649 + sizeof (bidi_cache_last_idx);
650
ed94e6d7
EZ
651 memcpy (databuf, &bidi_cache_idx, sizeof (bidi_cache_idx));
652 memcpy (databuf + sizeof (bidi_cache_idx),
653 bidi_cache, bidi_cache_idx * sizeof (struct bidi_it));
654 memcpy (databuf + sizeof (bidi_cache_idx)
655 + bidi_cache_idx * sizeof (struct bidi_it),
656 bidi_cache_start_stack, sizeof (bidi_cache_start_stack));
657 memcpy (databuf + sizeof (bidi_cache_idx)
658 + bidi_cache_idx * sizeof (struct bidi_it)
659 + sizeof (bidi_cache_start_stack),
660 &bidi_cache_sp, sizeof (bidi_cache_sp));
661 memcpy (databuf + sizeof (bidi_cache_idx)
662 + bidi_cache_idx * sizeof (struct bidi_it)
663 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
664 &bidi_cache_start, sizeof (bidi_cache_start));
665 memcpy (databuf + sizeof (bidi_cache_idx)
666 + bidi_cache_idx * sizeof (struct bidi_it)
667 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
668 + sizeof (bidi_cache_start),
669 &bidi_cache_last_idx, sizeof (bidi_cache_last_idx));
670
671 return databuf;
672}
673
d1410150
EZ
674/* Restore the cache state from a copy stashed away by
675 bidi_shelve_cache, and free the buffer used to stash that copy.
676 JUST_FREE non-zero means free the buffer, but don't restore the
677 cache; used when the corresponding iterator is discarded instead of
678 being restored. */
ed94e6d7 679void
35928349 680bidi_unshelve_cache (void *databuf, int just_free)
ed94e6d7
EZ
681{
682 unsigned char *p = databuf;
683
684 if (!p)
be39f003 685 {
d1410150
EZ
686 if (!just_free)
687 {
688 /* A NULL pointer means an empty cache. */
689 bidi_cache_start = 0;
690 bidi_cache_sp = 0;
691 bidi_cache_reset ();
692 }
be39f003 693 }
ed94e6d7
EZ
694 else
695 {
35928349
EZ
696 if (just_free)
697 {
698 ptrdiff_t idx;
699
700 memcpy (&idx, p, sizeof (bidi_cache_idx));
701 bidi_cache_total_alloc -=
702 sizeof (bidi_cache_idx) + idx * sizeof (struct bidi_it)
703 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
704 + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx);
705 }
706 else
707 {
708 memcpy (&bidi_cache_idx, p, sizeof (bidi_cache_idx));
709 bidi_cache_ensure_space (bidi_cache_idx);
710 memcpy (bidi_cache, p + sizeof (bidi_cache_idx),
711 bidi_cache_idx * sizeof (struct bidi_it));
712 memcpy (bidi_cache_start_stack,
713 p + sizeof (bidi_cache_idx)
714 + bidi_cache_idx * sizeof (struct bidi_it),
715 sizeof (bidi_cache_start_stack));
716 memcpy (&bidi_cache_sp,
717 p + sizeof (bidi_cache_idx)
718 + bidi_cache_idx * sizeof (struct bidi_it)
719 + sizeof (bidi_cache_start_stack),
720 sizeof (bidi_cache_sp));
721 memcpy (&bidi_cache_start,
722 p + sizeof (bidi_cache_idx)
723 + bidi_cache_idx * sizeof (struct bidi_it)
724 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp),
725 sizeof (bidi_cache_start));
726 memcpy (&bidi_cache_last_idx,
727 p + sizeof (bidi_cache_idx)
728 + bidi_cache_idx * sizeof (struct bidi_it)
729 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
730 + sizeof (bidi_cache_start),
731 sizeof (bidi_cache_last_idx));
732 bidi_cache_total_alloc -=
733 sizeof (bidi_cache_idx) + bidi_cache_idx * sizeof (struct bidi_it)
734 + sizeof (bidi_cache_start_stack) + sizeof (bidi_cache_sp)
735 + sizeof (bidi_cache_start) + sizeof (bidi_cache_last_idx);
736 }
ed94e6d7
EZ
737
738 xfree (p);
739 }
740}
b7b65b15 741
58b9f433 742\f
cbb09f04
EZ
743/***********************************************************************
744 Initialization
745 ***********************************************************************/
746static void
747bidi_initialize (void)
b7b65b15 748{
be39f003 749
cbb09f04
EZ
750#include "biditype.h"
751#include "bidimirror.h"
be39f003 752
cbb09f04 753 int i;
b7b65b15 754
cbb09f04
EZ
755 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
756 staticpro (&bidi_type_table);
757
758 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
759 char_table_set_range (bidi_type_table, bidi_type[i].from, bidi_type[i].to,
760 make_number (bidi_type[i].type));
761
762 bidi_mirror_table = Fmake_char_table (Qnil, Qnil);
763 staticpro (&bidi_mirror_table);
764
765 for (i = 0; i < sizeof bidi_mirror / sizeof bidi_mirror[0]; i++)
766 char_table_set (bidi_mirror_table, bidi_mirror[i].from,
767 make_number (bidi_mirror[i].to));
768
769 Qparagraph_start = intern ("paragraph-start");
770 staticpro (&Qparagraph_start);
771 paragraph_start_re = Fsymbol_value (Qparagraph_start);
772 if (!STRINGP (paragraph_start_re))
773 paragraph_start_re = build_string ("\f\\|[ \t]*$");
774 staticpro (&paragraph_start_re);
775 Qparagraph_separate = intern ("paragraph-separate");
776 staticpro (&Qparagraph_separate);
777 paragraph_separate_re = Fsymbol_value (Qparagraph_separate);
778 if (!STRINGP (paragraph_separate_re))
779 paragraph_separate_re = build_string ("[ \t\f]*$");
780 staticpro (&paragraph_separate_re);
58b9f433
EZ
781
782 bidi_cache_sp = 0;
ec7cc85b 783 bidi_cache_total_alloc = 0;
58b9f433 784
cbb09f04 785 bidi_initialized = 1;
b7b65b15
EZ
786}
787
cbb09f04
EZ
788/* Do whatever UAX#9 clause X8 says should be done at paragraph's
789 end. */
55d4c1b2 790static inline void
cbb09f04 791bidi_set_paragraph_end (struct bidi_it *bidi_it)
b7b65b15 792{
cbb09f04
EZ
793 bidi_it->invalid_levels = 0;
794 bidi_it->invalid_rl_levels = -1;
795 bidi_it->stack_idx = 0;
796 bidi_it->resolved_level = bidi_it->level_stack[0].level;
797}
b7b65b15 798
cbb09f04
EZ
799/* Initialize the bidi iterator from buffer/string position CHARPOS. */
800void
801bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, int frame_window_p,
802 struct bidi_it *bidi_it)
803{
804 if (! bidi_initialized)
805 bidi_initialize ();
806 if (charpos >= 0)
807 bidi_it->charpos = charpos;
808 if (bytepos >= 0)
809 bidi_it->bytepos = bytepos;
810 bidi_it->frame_window_p = frame_window_p;
811 bidi_it->nchars = -1; /* to be computed in bidi_resolve_explicit_1 */
812 bidi_it->first_elt = 1;
813 bidi_set_paragraph_end (bidi_it);
814 bidi_it->new_paragraph = 1;
815 bidi_it->separator_limit = -1;
816 bidi_it->type = NEUTRAL_B;
817 bidi_it->type_after_w1 = NEUTRAL_B;
818 bidi_it->orig_type = NEUTRAL_B;
819 bidi_it->prev_was_pdf = 0;
820 bidi_it->prev.type = bidi_it->prev.type_after_w1 =
821 bidi_it->prev.orig_type = UNKNOWN_BT;
89d3374a
EZ
822 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
823 bidi_it->last_strong.orig_type = UNKNOWN_BT;
cbb09f04
EZ
824 bidi_it->next_for_neutral.charpos = -1;
825 bidi_it->next_for_neutral.type =
826 bidi_it->next_for_neutral.type_after_w1 =
89d3374a 827 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
cbb09f04
EZ
828 bidi_it->prev_for_neutral.charpos = -1;
829 bidi_it->prev_for_neutral.type =
830 bidi_it->prev_for_neutral.type_after_w1 =
831 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
832 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
833 bidi_it->disp_pos = -1; /* invalid/unknown */
55439c61 834 bidi_it->disp_prop_p = 0;
cbb09f04
EZ
835 /* We can only shrink the cache if we are at the bottom level of its
836 "stack". */
837 if (bidi_cache_start == 0)
838 bidi_cache_shrink ();
58b9f433
EZ
839 else
840 bidi_cache_reset ();
b7b65b15
EZ
841}
842
182ce2d2 843/* Perform initializations for reordering a new line of bidi text. */
6bff6497 844static void
be39f003
EZ
845bidi_line_init (struct bidi_it *bidi_it)
846{
847 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
848 bidi_it->resolved_level = bidi_it->level_stack[0].level;
849 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
850 bidi_it->invalid_levels = 0;
851 bidi_it->invalid_rl_levels = -1;
852 bidi_it->next_en_pos = -1;
853 bidi_it->next_for_ws.type = UNKNOWN_BT;
b44d9321
EZ
854 bidi_set_sor_type (bidi_it,
855 bidi_it->paragraph_dir == R2L ? 1 : 0,
be39f003
EZ
856 bidi_it->level_stack[0].level); /* X10 */
857
858 bidi_cache_reset ();
859}
860
cbb09f04
EZ
861\f
862/***********************************************************************
863 Fetching characters
864 ***********************************************************************/
865
f3014ef5
EZ
866/* Count bytes in string S between BEG/BEGBYTE and END. BEG and END
867 are zero-based character positions in S, BEGBYTE is byte position
868 corresponding to BEG. UNIBYTE, if non-zero, means S is a unibyte
869 string. */
87e67904
EZ
870static inline EMACS_INT
871bidi_count_bytes (const unsigned char *s, const EMACS_INT beg,
f3014ef5 872 const EMACS_INT begbyte, const EMACS_INT end, int unibyte)
87e67904
EZ
873{
874 EMACS_INT pos = beg;
875 const unsigned char *p = s + begbyte, *start = p;
876
f3014ef5
EZ
877 if (unibyte)
878 p = s + end;
879 else
87e67904 880 {
f3014ef5
EZ
881 if (!CHAR_HEAD_P (*p))
882 abort ();
883
884 while (pos < end)
885 {
886 p += BYTES_BY_CHAR_HEAD (*p);
887 pos++;
888 }
87e67904
EZ
889 }
890
891 return p - start;
892}
893
894/* Fetch and returns the character at byte position BYTEPOS. If S is
895 non-NULL, fetch the character from string S; otherwise fetch the
f3014ef5
EZ
896 character from the current buffer. UNIBYTE non-zero means S is a
897 unibyte string. */
87e67904 898static inline int
f3014ef5 899bidi_char_at_pos (EMACS_INT bytepos, const unsigned char *s, int unibyte)
87e67904
EZ
900{
901 if (s)
f3014ef5
EZ
902 {
903 if (unibyte)
904 return s[bytepos];
905 else
906 return STRING_CHAR (s + bytepos);
907 }
87e67904
EZ
908 else
909 return FETCH_MULTIBYTE_CHAR (bytepos);
910}
911
102ebb00
EZ
912/* Fetch and return the character at BYTEPOS/CHARPOS. If that
913 character is covered by a display string, treat the entire run of
914 covered characters as a single character u+FFFC, and return their
915 combined length in CH_LEN and NCHARS. DISP_POS specifies the
916 character position of the next display string, or -1 if not yet
55439c61
EZ
917 computed. DISP_PROP_P non-zero means that there's really a display
918 string at DISP_POS, as opposed to when we searched till DISP_POS
919 without findingone. When the next character is at or beyond that
920 position, the function updates DISP_POS with the position of the
921 next display string. STRING->s is the C string to iterate, or NULL
922 if iterating over a buffer or a Lisp string; in the latter case,
923 STRING->lstring is the Lisp string. */
fec2107c 924static inline int
102ebb00 925bidi_fetch_char (EMACS_INT bytepos, EMACS_INT charpos, EMACS_INT *disp_pos,
55439c61 926 int *disp_prop_p, struct bidi_string_data *string,
fc6f18ce 927 int frame_window_p, EMACS_INT *ch_len, EMACS_INT *nchars)
182ce2d2
EZ
928{
929 int ch;
9f257352
EZ
930 EMACS_INT endpos =
931 (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
6db161be 932 struct text_pos pos;
182ce2d2 933
182ce2d2 934 /* If we got past the last known position of display string, compute
87e67904
EZ
935 the position of the next one. That position could be at CHARPOS. */
936 if (charpos < endpos && charpos > *disp_pos)
6db161be
EZ
937 {
938 SET_TEXT_POS (pos, charpos, bytepos);
55439c61
EZ
939 *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
940 disp_prop_p);
6db161be 941 }
102ebb00
EZ
942
943 /* Fetch the character at BYTEPOS. */
87e67904 944 if (charpos >= endpos)
182ce2d2
EZ
945 {
946 ch = BIDI_EOB;
947 *ch_len = 1;
948 *nchars = 1;
87e67904 949 *disp_pos = endpos;
55439c61 950 *disp_prop_p = 0;
182ce2d2 951 }
55439c61 952 else if (charpos >= *disp_pos && *disp_prop_p)
182ce2d2 953 {
7b600102
EZ
954 EMACS_INT disp_end_pos;
955
956 /* We don't expect to find ourselves in the middle of a display
957 property. Hopefully, it will never be needed. */
958 if (charpos > *disp_pos)
959 abort ();
960 /* Return the Unicode Object Replacement Character to represent
87e67904 961 the entire run of characters covered by the display string. */
7b600102 962 ch = 0xFFFC;
87e67904 963 disp_end_pos = compute_display_string_end (*disp_pos, string);
7b600102 964 *nchars = disp_end_pos - *disp_pos;
f3014ef5
EZ
965 if (*nchars <= 0)
966 abort ();
87e67904
EZ
967 if (string->s)
968 *ch_len = bidi_count_bytes (string->s, *disp_pos, bytepos,
f3014ef5 969 disp_end_pos, string->unibyte);
9f257352
EZ
970 else if (STRINGP (string->lstring))
971 *ch_len = bidi_count_bytes (SDATA (string->lstring), *disp_pos,
f3014ef5 972 bytepos, disp_end_pos, string->unibyte);
87e67904
EZ
973 else
974 *ch_len = CHAR_TO_BYTE (disp_end_pos) - bytepos;
182ce2d2 975 }
182ce2d2
EZ
976 else
977 {
87e67904
EZ
978 if (string->s)
979 {
66ff7ddf 980 int len;
87e67904 981
f3014ef5
EZ
982 if (!string->unibyte)
983 {
984 ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
985 *ch_len = len;
986 }
987 else
988 {
989 ch = UNIBYTE_TO_CHAR (string->s[bytepos]);
990 *ch_len = 1;
991 }
87e67904 992 }
9f257352
EZ
993 else if (STRINGP (string->lstring))
994 {
66ff7ddf 995 int len;
9f257352 996
f3014ef5
EZ
997 if (!string->unibyte)
998 {
999 ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
1000 len);
1001 *ch_len = len;
1002 }
1003 else
1004 {
1005 ch = UNIBYTE_TO_CHAR (SREF (string->lstring, bytepos));
1006 *ch_len = 1;
1007 }
9f257352 1008 }
87e67904
EZ
1009 else
1010 {
1011 ch = FETCH_MULTIBYTE_CHAR (bytepos);
1012 *ch_len = CHAR_BYTES (ch);
1013 }
182ce2d2
EZ
1014 *nchars = 1;
1015 }
1016
1017 /* If we just entered a run of characters covered by a display
1018 string, compute the position of the next display string. */
55439c61
EZ
1019 if (charpos + *nchars <= endpos && charpos + *nchars > *disp_pos
1020 && *disp_prop_p)
6db161be
EZ
1021 {
1022 SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
55439c61
EZ
1023 *disp_pos = compute_display_string_pos (&pos, string, frame_window_p,
1024 disp_prop_p);
6db161be 1025 }
182ce2d2
EZ
1026
1027 return ch;
1028}
1029
cbb09f04
EZ
1030\f
1031/***********************************************************************
1032 Determining paragraph direction
1033 ***********************************************************************/
1034
1035/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
1036 Value is the non-negative length of the paragraph separator
1037 following the buffer position, -1 if position is at the beginning
1038 of a new paragraph, or -2 if position is neither at beginning nor
1039 at end of a paragraph. */
1040static EMACS_INT
1041bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
1042{
1043 Lisp_Object sep_re;
1044 Lisp_Object start_re;
1045 EMACS_INT val;
1046
1047 sep_re = paragraph_separate_re;
1048 start_re = paragraph_start_re;
1049
1050 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
1051 if (val < 0)
1052 {
1053 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
1054 val = -1;
1055 else
1056 val = -2;
1057 }
1058
1059 return val;
1060}
1061
be39f003
EZ
1062/* Find the beginning of this paragraph by looking back in the buffer.
1063 Value is the byte position of the paragraph's beginning. */
1064static EMACS_INT
b44d9321 1065bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
6bff6497 1066{
372b7a95 1067 Lisp_Object re = paragraph_start_re;
6bff6497
EZ
1068 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
1069
6bff6497
EZ
1070 while (pos_byte > BEGV_BYTE
1071 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
1072 {
182ce2d2
EZ
1073 /* FIXME: What if the paragraph beginning is covered by a
1074 display string? And what if a display string covering some
1075 of the text over which we scan back includes
1076 paragraph_start_re? */
be39f003
EZ
1077 pos = find_next_newline_no_quit (pos - 1, -1);
1078 pos_byte = CHAR_TO_BYTE (pos);
6bff6497 1079 }
be39f003 1080 return pos_byte;
6bff6497
EZ
1081}
1082
bea4f10c 1083/* Determine the base direction, a.k.a. base embedding level, of the
b44d9321
EZ
1084 paragraph we are about to iterate through. If DIR is either L2R or
1085 R2L, just use that. Otherwise, determine the paragraph direction
bea4f10c
EZ
1086 from the first strong directional character of the paragraph.
1087
182ce2d2 1088 NO_DEFAULT_P non-zero means don't default to L2R if the paragraph
bea4f10c
EZ
1089 has no strong directional characters and both DIR and
1090 bidi_it->paragraph_dir are NEUTRAL_DIR. In that case, search back
1091 in the buffer until a paragraph is found with a strong character,
1092 or until hitting BEGV. In the latter case, fall back to L2R. This
1093 flag is used in current-bidi-paragraph-direction.
1094
1095 Note that this function gives the paragraph separator the same
1096 direction as the preceding paragraph, even though Emacs generally
1097 views the separartor as not belonging to any paragraph. */
b7b65b15 1098void
bea4f10c 1099bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, int no_default_p)
b7b65b15 1100{
6bff6497 1101 EMACS_INT bytepos = bidi_it->bytepos;
9f257352 1102 int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
bea4f10c 1103 EMACS_INT pstartbyte;
87e67904
EZ
1104 /* Note that begbyte is a byte position, while end is a character
1105 position. Yes, this is ugly, but we are trying to avoid costly
1106 calls to BYTE_TO_CHAR and its ilk. */
1107 EMACS_INT begbyte = string_p ? 0 : BEGV_BYTE;
1108 EMACS_INT end = string_p ? bidi_it->string.schars : ZV;
e7402cb2 1109
5e65aec0 1110 /* Special case for an empty buffer. */
87e67904 1111 if (bytepos == begbyte && bidi_it->charpos == end)
5e65aec0 1112 dir = L2R;
9c82e145 1113 /* We should never be called at EOB or before BEGV. */
87e67904 1114 else if (bidi_it->charpos >= end || bytepos < begbyte)
9c82e145
EZ
1115 abort ();
1116
be39f003
EZ
1117 if (dir == L2R)
1118 {
1119 bidi_it->paragraph_dir = L2R;
1120 bidi_it->new_paragraph = 0;
1121 }
1122 else if (dir == R2L)
1123 {
1124 bidi_it->paragraph_dir = R2L;
1125 bidi_it->new_paragraph = 0;
1126 }
b7b65b15
EZ
1127 else if (dir == NEUTRAL_DIR) /* P2 */
1128 {
182ce2d2
EZ
1129 int ch;
1130 EMACS_INT ch_len, nchars;
1131 EMACS_INT pos, disp_pos = -1;
55439c61 1132 int disp_prop_p = 0;
6bff6497 1133 bidi_type_t type;
9f257352 1134 const unsigned char *s;
be39f003 1135
d20e1419
EZ
1136 if (!bidi_initialized)
1137 bidi_initialize ();
1138
be39f003
EZ
1139 /* If we are inside a paragraph separator, we are just waiting
1140 for the separator to be exhausted; use the previous paragraph
e5a2fec7
EZ
1141 direction. But don't do that if we have been just reseated,
1142 because we need to reinitialize below in that case. */
1143 if (!bidi_it->first_elt
1144 && bidi_it->charpos < bidi_it->separator_limit)
be39f003
EZ
1145 return;
1146
b44d9321 1147 /* If we are on a newline, get past it to where the next
5e65aec0
EZ
1148 paragraph might start. But don't do that at BEGV since then
1149 we are potentially in a new paragraph that doesn't yet
1150 exist. */
c143c213 1151 pos = bidi_it->charpos;
9f257352
EZ
1152 s = STRINGP (bidi_it->string.lstring) ?
1153 SDATA (bidi_it->string.lstring) : bidi_it->string.s;
f3014ef5
EZ
1154 if (bytepos > begbyte
1155 && bidi_char_at_pos (bytepos, s, bidi_it->string.unibyte) == '\n')
be39f003 1156 {
b44d9321 1157 bytepos++;
c143c213 1158 pos++;
be39f003 1159 }
b44d9321
EZ
1160
1161 /* We are either at the beginning of a paragraph or in the
1162 middle of it. Find where this paragraph starts. */
87e67904
EZ
1163 if (string_p)
1164 {
1165 /* We don't support changes of paragraph direction inside a
1166 string. It is treated as a single paragraph. */
1167 pstartbyte = 0;
1168 }
1169 else
1170 pstartbyte = bidi_find_paragraph_start (pos, bytepos);
be39f003
EZ
1171 bidi_it->separator_limit = -1;
1172 bidi_it->new_paragraph = 0;
bea4f10c
EZ
1173
1174 /* The following loop is run more than once only if NO_DEFAULT_P
87e67904 1175 is non-zero, and only if we are iterating on a buffer. */
bea4f10c
EZ
1176 do {
1177 bytepos = pstartbyte;
87e67904
EZ
1178 if (!string_p)
1179 pos = BYTE_TO_CHAR (bytepos);
55439c61
EZ
1180 ch = bidi_fetch_char (bytepos, pos, &disp_pos, &disp_prop_p,
1181 &bidi_it->string,
87e67904 1182 bidi_it->frame_window_p, &ch_len, &nchars);
bea4f10c
EZ
1183 type = bidi_get_type (ch, NEUTRAL_DIR);
1184
182ce2d2 1185 for (pos += nchars, bytepos += ch_len;
bea4f10c
EZ
1186 (bidi_get_category (type) != STRONG)
1187 || (bidi_ignore_explicit_marks_for_paragraph_level
1188 && (type == RLE || type == RLO
1189 || type == LRE || type == LRO));
1190 type = bidi_get_type (ch, NEUTRAL_DIR))
1191 {
87e67904 1192 if (pos >= end)
bea4f10c
EZ
1193 {
1194 /* Pretend there's a paragraph separator at end of
87e67904 1195 buffer/string. */
bea4f10c
EZ
1196 type = NEUTRAL_B;
1197 break;
1198 }
0bb23927
EZ
1199 if (!string_p
1200 && type == NEUTRAL_B
1201 && bidi_at_paragraph_end (pos, bytepos) >= -1)
cf99dcf8 1202 break;
102ebb00 1203 /* Fetch next character and advance to get past it. */
55439c61
EZ
1204 ch = bidi_fetch_char (bytepos, pos, &disp_pos,
1205 &disp_prop_p, &bidi_it->string,
fc6f18ce 1206 bidi_it->frame_window_p, &ch_len, &nchars);
182ce2d2
EZ
1207 pos += nchars;
1208 bytepos += ch_len;
bea4f10c 1209 }
32413314
EZ
1210 if ((type == STRONG_R || type == STRONG_AL) /* P3 */
1211 || (!bidi_ignore_explicit_marks_for_paragraph_level
1212 && (type == RLO || type == RLE)))
bea4f10c 1213 bidi_it->paragraph_dir = R2L;
32413314
EZ
1214 else if (type == STRONG_L
1215 || (!bidi_ignore_explicit_marks_for_paragraph_level
1216 && (type == LRO || type == LRE)))
bea4f10c 1217 bidi_it->paragraph_dir = L2R;
87e67904
EZ
1218 if (!string_p
1219 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR)
bea4f10c
EZ
1220 {
1221 /* If this paragraph is at BEGV, default to L2R. */
1222 if (pstartbyte == BEGV_BYTE)
1223 bidi_it->paragraph_dir = L2R; /* P3 and HL1 */
1224 else
1225 {
1226 EMACS_INT prevpbyte = pstartbyte;
1227 EMACS_INT p = BYTE_TO_CHAR (pstartbyte), pbyte = pstartbyte;
1228
1229 /* Find the beginning of the previous paragraph, if any. */
1230 while (pbyte > BEGV_BYTE && prevpbyte >= pstartbyte)
1231 {
182ce2d2
EZ
1232 /* FXIME: What if p is covered by a display
1233 string? See also a FIXME inside
1234 bidi_find_paragraph_start. */
bea4f10c
EZ
1235 p--;
1236 pbyte = CHAR_TO_BYTE (p);
1237 prevpbyte = bidi_find_paragraph_start (p, pbyte);
1238 }
1239 pstartbyte = prevpbyte;
1240 }
1241 }
87e67904
EZ
1242 } while (!string_p
1243 && no_default_p && bidi_it->paragraph_dir == NEUTRAL_DIR);
b7b65b15 1244 }
be39f003
EZ
1245 else
1246 abort ();
b7b65b15 1247
b44d9321
EZ
1248 /* Contrary to UAX#9 clause P3, we only default the paragraph
1249 direction to L2R if we have no previous usable paragraph
bea4f10c 1250 direction. This is allowed by the HL1 clause. */
d20e1419 1251 if (bidi_it->paragraph_dir != L2R && bidi_it->paragraph_dir != R2L)
bea4f10c 1252 bidi_it->paragraph_dir = L2R; /* P3 and HL1 ``higher-level protocols'' */
be39f003 1253 if (bidi_it->paragraph_dir == R2L)
b44d9321 1254 bidi_it->level_stack[0].level = 1;
be39f003 1255 else
b44d9321 1256 bidi_it->level_stack[0].level = 0;
be39f003
EZ
1257
1258 bidi_line_init (bidi_it);
b7b65b15
EZ
1259}
1260
cbb09f04
EZ
1261\f
1262/***********************************************************************
1263 Resolving explicit and implicit levels.
58b9f433 1264 The rest of this file constitutes the core of the UBA implementation.
cbb09f04 1265 ***********************************************************************/
b7b65b15 1266
55d4c1b2 1267static inline int
182ce2d2 1268bidi_explicit_dir_char (int ch)
b7b65b15 1269{
182ce2d2
EZ
1270 bidi_type_t ch_type;
1271
1272 if (!bidi_initialized)
1273 abort ();
1274 ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
1275 return (ch_type == LRE || ch_type == LRO
1276 || ch_type == RLE || ch_type == RLO
1277 || ch_type == PDF);
b7b65b15
EZ
1278}
1279
1280/* A helper function for bidi_resolve_explicit. It advances to the
1281 next character in logical order and determines the new embedding
1282 level and directional override, but does not take into account
1283 empty embeddings. */
1284static int
1285bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
1286{
1287 int curchar;
1288 bidi_type_t type;
1289 int current_level;
1290 int new_level;
1291 bidi_dir_t override;
9f257352 1292 int string_p = bidi_it->string.s != NULL || STRINGP (bidi_it->string.lstring);
b7b65b15 1293
182ce2d2
EZ
1294 /* If reseat()'ed, don't advance, so as to start iteration from the
1295 position where we were reseated. bidi_it->bytepos can be less
1296 than BEGV_BYTE after reseat to BEGV. */
87e67904 1297 if (bidi_it->bytepos < (string_p ? 0 : BEGV_BYTE)
9c82e145 1298 || bidi_it->first_elt)
e7402cb2 1299 {
9c82e145 1300 bidi_it->first_elt = 0;
87e67904
EZ
1301 if (string_p)
1302 {
9f257352
EZ
1303 const unsigned char *p =
1304 STRINGP (bidi_it->string.lstring)
1305 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1306
87e67904
EZ
1307 if (bidi_it->charpos < 0)
1308 bidi_it->charpos = 0;
f3014ef5
EZ
1309 bidi_it->bytepos = bidi_count_bytes (p, 0, 0, bidi_it->charpos,
1310 bidi_it->string.unibyte);
87e67904
EZ
1311 }
1312 else
1313 {
1314 if (bidi_it->charpos < BEGV)
1315 bidi_it->charpos = BEGV;
1316 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
1317 }
e7402cb2 1318 }
87e67904
EZ
1319 /* Don't move at end of buffer/string. */
1320 else if (bidi_it->charpos < (string_p ? bidi_it->string.schars : ZV))
b7b65b15 1321 {
182ce2d2
EZ
1322 /* Advance to the next character, skipping characters covered by
1323 display strings (nchars > 1). */
102ebb00
EZ
1324 if (bidi_it->nchars <= 0)
1325 abort ();
182ce2d2 1326 bidi_it->charpos += bidi_it->nchars;
e7402cb2
EZ
1327 if (bidi_it->ch_len == 0)
1328 abort ();
b7b65b15
EZ
1329 bidi_it->bytepos += bidi_it->ch_len;
1330 }
1331
1332 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
1333 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1334 new_level = current_level;
1335
87e67904 1336 if (bidi_it->charpos >= (string_p ? bidi_it->string.schars : ZV))
e7402cb2
EZ
1337 {
1338 curchar = BIDI_EOB;
1339 bidi_it->ch_len = 1;
182ce2d2 1340 bidi_it->nchars = 1;
87e67904 1341 bidi_it->disp_pos = (string_p ? bidi_it->string.schars : ZV);
55439c61 1342 bidi_it->disp_prop_p = 0;
e7402cb2
EZ
1343 }
1344 else
1345 {
182ce2d2
EZ
1346 /* Fetch the character at BYTEPOS. If it is covered by a
1347 display string, treat the entire run of covered characters as
1348 a single character u+FFFC. */
102ebb00 1349 curchar = bidi_fetch_char (bidi_it->bytepos, bidi_it->charpos,
55439c61
EZ
1350 &bidi_it->disp_pos, &bidi_it->disp_prop_p,
1351 &bidi_it->string, bidi_it->frame_window_p,
102ebb00 1352 &bidi_it->ch_len, &bidi_it->nchars);
e7402cb2 1353 }
b7b65b15 1354 bidi_it->ch = curchar;
b7b65b15 1355
6bff6497
EZ
1356 /* Don't apply directional override here, as all the types we handle
1357 below will not be affected by the override anyway, and we need
1358 the original type unaltered. The override will be applied in
1359 bidi_resolve_weak. */
1360 type = bidi_get_type (curchar, NEUTRAL_DIR);
89d3374a
EZ
1361 bidi_it->orig_type = type;
1362 bidi_check_type (bidi_it->orig_type);
b7b65b15
EZ
1363
1364 if (type != PDF)
1365 bidi_it->prev_was_pdf = 0;
1366
89d3374a 1367 bidi_it->type_after_w1 = UNKNOWN_BT;
b7b65b15
EZ
1368
1369 switch (type)
1370 {
1371 case RLE: /* X2 */
1372 case RLO: /* X4 */
89d3374a
EZ
1373 bidi_it->type_after_w1 = type;
1374 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 1375 type = WEAK_BN; /* X9/Retaining */
87e67904 1376 if (bidi_it->ignore_bn_limit <= -1)
b7b65b15
EZ
1377 {
1378 if (current_level <= BIDI_MAXLEVEL - 4)
1379 {
1380 /* Compute the least odd embedding level greater than
1381 the current level. */
1382 new_level = ((current_level + 1) & ~1) + 1;
89d3374a 1383 if (bidi_it->type_after_w1 == RLE)
b7b65b15
EZ
1384 override = NEUTRAL_DIR;
1385 else
1386 override = R2L;
1387 if (current_level == BIDI_MAXLEVEL - 4)
1388 bidi_it->invalid_rl_levels = 0;
1389 bidi_push_embedding_level (bidi_it, new_level, override);
1390 }
1391 else
1392 {
1393 bidi_it->invalid_levels++;
1394 /* See the commentary about invalid_rl_levels below. */
1395 if (bidi_it->invalid_rl_levels < 0)
1396 bidi_it->invalid_rl_levels = 0;
1397 bidi_it->invalid_rl_levels++;
1398 }
1399 }
89d3374a 1400 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
1401 || bidi_it->next_en_pos > bidi_it->charpos)
1402 type = WEAK_EN;
1403 break;
1404 case LRE: /* X3 */
1405 case LRO: /* X5 */
89d3374a
EZ
1406 bidi_it->type_after_w1 = type;
1407 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 1408 type = WEAK_BN; /* X9/Retaining */
87e67904 1409 if (bidi_it->ignore_bn_limit <= -1)
b7b65b15
EZ
1410 {
1411 if (current_level <= BIDI_MAXLEVEL - 5)
1412 {
1413 /* Compute the least even embedding level greater than
1414 the current level. */
1415 new_level = ((current_level + 2) & ~1);
89d3374a 1416 if (bidi_it->type_after_w1 == LRE)
b7b65b15
EZ
1417 override = NEUTRAL_DIR;
1418 else
1419 override = L2R;
1420 bidi_push_embedding_level (bidi_it, new_level, override);
1421 }
1422 else
1423 {
1424 bidi_it->invalid_levels++;
1425 /* invalid_rl_levels counts invalid levels encountered
1426 while the embedding level was already too high for
1427 LRE/LRO, but not for RLE/RLO. That is because
1428 there may be exactly one PDF which we should not
1429 ignore even though invalid_levels is non-zero.
1430 invalid_rl_levels helps to know what PDF is
1431 that. */
1432 if (bidi_it->invalid_rl_levels >= 0)
1433 bidi_it->invalid_rl_levels++;
1434 }
1435 }
89d3374a 1436 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
1437 || bidi_it->next_en_pos > bidi_it->charpos)
1438 type = WEAK_EN;
1439 break;
1440 case PDF: /* X7 */
89d3374a
EZ
1441 bidi_it->type_after_w1 = type;
1442 bidi_check_type (bidi_it->type_after_w1);
b7b65b15 1443 type = WEAK_BN; /* X9/Retaining */
87e67904 1444 if (bidi_it->ignore_bn_limit <= -1)
b7b65b15
EZ
1445 {
1446 if (!bidi_it->invalid_rl_levels)
1447 {
1448 new_level = bidi_pop_embedding_level (bidi_it);
1449 bidi_it->invalid_rl_levels = -1;
1450 if (bidi_it->invalid_levels)
1451 bidi_it->invalid_levels--;
1452 /* else nothing: UAX#9 says to ignore invalid PDFs */
1453 }
1454 if (!bidi_it->invalid_levels)
1455 new_level = bidi_pop_embedding_level (bidi_it);
1456 else
1457 {
1458 bidi_it->invalid_levels--;
1459 bidi_it->invalid_rl_levels--;
1460 }
1461 }
89d3374a 1462 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
b7b65b15
EZ
1463 || bidi_it->next_en_pos > bidi_it->charpos)
1464 type = WEAK_EN;
1465 break;
1466 default:
1467 /* Nothing. */
1468 break;
1469 }
1470
1471 bidi_it->type = type;
2d6e4628 1472 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1473
1474 return new_level;
1475}
1476
1477/* Given an iterator state in BIDI_IT, advance one character position
182ce2d2
EZ
1478 in the buffer/string to the next character (in the logical order),
1479 resolve any explicit embeddings and directional overrides, and
1480 return the embedding level of the character after resolving
1481 explicit directives and ignoring empty embeddings. */
b7b65b15
EZ
1482static int
1483bidi_resolve_explicit (struct bidi_it *bidi_it)
1484{
1485 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1486 int new_level = bidi_resolve_explicit_1 (bidi_it);
87e67904 1487 EMACS_INT eob = bidi_it->string.s ? bidi_it->string.schars : ZV;
9f257352
EZ
1488 const unsigned char *s = STRINGP (bidi_it->string.lstring)
1489 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
b7b65b15
EZ
1490
1491 if (prev_level < new_level
1492 && bidi_it->type == WEAK_BN
87e67904
EZ
1493 && bidi_it->ignore_bn_limit == -1 /* only if not already known */
1494 && bidi_it->charpos < eob /* not already at EOB */
1495 && bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
f3014ef5
EZ
1496 + bidi_it->ch_len, s,
1497 bidi_it->string.unibyte)))
b7b65b15
EZ
1498 {
1499 /* Avoid pushing and popping embedding levels if the level run
1500 is empty, as this breaks level runs where it shouldn't.
1501 UAX#9 removes all the explicit embedding and override codes,
1502 so empty embeddings disappear without a trace. We need to
1503 behave as if we did the same. */
1504 struct bidi_it saved_it;
1505 int level = prev_level;
1506
1507 bidi_copy_it (&saved_it, bidi_it);
1508
87e67904 1509 while (bidi_explicit_dir_char (bidi_char_at_pos (bidi_it->bytepos
f3014ef5
EZ
1510 + bidi_it->ch_len, s,
1511 bidi_it->string.unibyte)))
b7b65b15 1512 {
182ce2d2
EZ
1513 /* This advances to the next character, skipping any
1514 characters covered by display strings. */
b7b65b15 1515 level = bidi_resolve_explicit_1 (bidi_it);
9f257352
EZ
1516 /* If string.lstring was relocated inside bidi_resolve_explicit_1,
1517 a pointer to its data is no longer valid. */
1518 if (STRINGP (bidi_it->string.lstring))
1519 s = SDATA (bidi_it->string.lstring);
b7b65b15
EZ
1520 }
1521
102ebb00
EZ
1522 if (bidi_it->nchars <= 0)
1523 abort ();
b7b65b15 1524 if (level == prev_level) /* empty embedding */
182ce2d2 1525 saved_it.ignore_bn_limit = bidi_it->charpos + bidi_it->nchars;
b7b65b15 1526 else /* this embedding is non-empty */
87e67904 1527 saved_it.ignore_bn_limit = -2;
b7b65b15
EZ
1528
1529 bidi_copy_it (bidi_it, &saved_it);
87e67904 1530 if (bidi_it->ignore_bn_limit > -1)
b7b65b15
EZ
1531 {
1532 /* We pushed a level, but we shouldn't have. Undo that. */
1533 if (!bidi_it->invalid_rl_levels)
1534 {
1535 new_level = bidi_pop_embedding_level (bidi_it);
1536 bidi_it->invalid_rl_levels = -1;
1537 if (bidi_it->invalid_levels)
1538 bidi_it->invalid_levels--;
1539 }
1540 if (!bidi_it->invalid_levels)
1541 new_level = bidi_pop_embedding_level (bidi_it);
1542 else
1543 {
1544 bidi_it->invalid_levels--;
1545 bidi_it->invalid_rl_levels--;
1546 }
1547 }
1548 }
1549
b7b65b15
EZ
1550 if (bidi_it->type == NEUTRAL_B) /* X8 */
1551 {
21fce5ab 1552 bidi_set_paragraph_end (bidi_it);
6bff6497
EZ
1553 /* This is needed by bidi_resolve_weak below, and in L1. */
1554 bidi_it->type_after_w1 = bidi_it->type;
89d3374a 1555 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1556 }
1557
1558 return new_level;
1559}
1560
182ce2d2
EZ
1561/* Advance in the buffer/string, resolve weak types and return the
1562 type of the next character after weak type resolution. */
fd3998ff 1563static bidi_type_t
b7b65b15
EZ
1564bidi_resolve_weak (struct bidi_it *bidi_it)
1565{
1566 bidi_type_t type;
1567 bidi_dir_t override;
1568 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1569 int new_level = bidi_resolve_explicit (bidi_it);
1570 int next_char;
1571 bidi_type_t type_of_next;
1572 struct bidi_it saved_it;
9f257352
EZ
1573 EMACS_INT eob =
1574 (STRINGP (bidi_it->string.lstring) || bidi_it->string.s)
1575 ? bidi_it->string.schars : ZV;
b7b65b15
EZ
1576
1577 type = bidi_it->type;
1578 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1579
1580 if (type == UNKNOWN_BT
1581 || type == LRE
1582 || type == LRO
1583 || type == RLE
1584 || type == RLO
1585 || type == PDF)
1586 abort ();
1587
1588 if (new_level != prev_level
1589 || bidi_it->type == NEUTRAL_B)
1590 {
1591 /* We've got a new embedding level run, compute the directional
1592 type of sor and initialize per-run variables (UAX#9, clause
1593 X10). */
1594 bidi_set_sor_type (bidi_it, prev_level, new_level);
1595 }
1596 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1597 || type == WEAK_BN || type == STRONG_AL)
89d3374a
EZ
1598 bidi_it->type_after_w1 = type; /* needed in L1 */
1599 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1600
1601 /* Level and directional override status are already recorded in
1602 bidi_it, and do not need any change; see X6. */
1603 if (override == R2L) /* X6 */
1604 type = STRONG_R;
1605 else if (override == L2R)
1606 type = STRONG_L;
bc5a45f3 1607 else
b7b65b15 1608 {
bc5a45f3 1609 if (type == WEAK_NSM) /* W1 */
b7b65b15 1610 {
bc5a45f3 1611 /* Note that we don't need to consider the case where the
5930fe97
EZ
1612 prev character has its type overridden by an RLO or LRO,
1613 because then either the type of this NSM would have been
1614 also overridden, or the previous character is outside the
1615 current level run, and thus not relevant to this NSM.
1616 This is why NSM gets the type_after_w1 of the previous
1617 character. */
ebb5722e
EZ
1618 if (bidi_it->prev.type_after_w1 != UNKNOWN_BT
1619 /* if type_after_w1 is NEUTRAL_B, this NSM is at sor */
1620 && bidi_it->prev.type_after_w1 != NEUTRAL_B)
5930fe97 1621 type = bidi_it->prev.type_after_w1;
bc5a45f3
EZ
1622 else if (bidi_it->sor == R2L)
1623 type = STRONG_R;
1624 else if (bidi_it->sor == L2R)
1625 type = STRONG_L;
1626 else /* shouldn't happen! */
1627 abort ();
b7b65b15 1628 }
bc5a45f3
EZ
1629 if (type == WEAK_EN /* W2 */
1630 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1631 type = WEAK_AN;
1632 else if (type == STRONG_AL) /* W3 */
1633 type = STRONG_R;
1634 else if ((type == WEAK_ES /* W4 */
1635 && bidi_it->prev.type_after_w1 == WEAK_EN
1636 && bidi_it->prev.orig_type == WEAK_EN)
1637 || (type == WEAK_CS
1638 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1639 && bidi_it->prev.orig_type == WEAK_EN)
1640 || bidi_it->prev.type_after_w1 == WEAK_AN)))
b7b65b15 1641 {
9f257352
EZ
1642 const unsigned char *s =
1643 STRINGP (bidi_it->string.lstring)
1644 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
1645
e7402cb2 1646 next_char =
87e67904
EZ
1647 bidi_it->charpos + bidi_it->nchars >= eob
1648 ? BIDI_EOB
f3014ef5
EZ
1649 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1650 bidi_it->string.unibyte);
6bff6497 1651 type_of_next = bidi_get_type (next_char, override);
b7b65b15 1652
bc5a45f3 1653 if (type_of_next == WEAK_BN
b7b65b15
EZ
1654 || bidi_explicit_dir_char (next_char))
1655 {
1656 bidi_copy_it (&saved_it, bidi_it);
1657 while (bidi_resolve_explicit (bidi_it) == new_level
bc5a45f3 1658 && bidi_it->type == WEAK_BN)
b7b65b15
EZ
1659 ;
1660 type_of_next = bidi_it->type;
b7b65b15
EZ
1661 bidi_copy_it (bidi_it, &saved_it);
1662 }
bc5a45f3
EZ
1663
1664 /* If the next character is EN, but the last strong-type
1665 character is AL, that next EN will be changed to AN when
1666 we process it in W2 above. So in that case, this ES
1667 should not be changed into EN. */
1668 if (type == WEAK_ES
1669 && type_of_next == WEAK_EN
1670 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1671 type = WEAK_EN;
1672 else if (type == WEAK_CS)
b7b65b15 1673 {
bc5a45f3
EZ
1674 if (bidi_it->prev.type_after_w1 == WEAK_AN
1675 && (type_of_next == WEAK_AN
1676 /* If the next character is EN, but the last
1677 strong-type character is AL, EN will be later
1678 changed to AN when we process it in W2 above.
1679 So in that case, this ES should not be
1680 changed into EN. */
1681 || (type_of_next == WEAK_EN
1682 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1683 type = WEAK_AN;
1684 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1685 && type_of_next == WEAK_EN
1686 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1687 type = WEAK_EN;
1688 }
1689 }
1690 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1691 || type == WEAK_BN) /* W5/Retaining */
1692 {
1693 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */
1694 || bidi_it->next_en_pos > bidi_it->charpos)
1695 type = WEAK_EN;
1696 else /* W5: ET/BN with EN after it. */
1697 {
182ce2d2 1698 EMACS_INT en_pos = bidi_it->charpos + bidi_it->nchars;
9f257352
EZ
1699 const unsigned char *s =
1700 STRINGP (bidi_it->string.lstring)
1701 ? SDATA (bidi_it->string.lstring) : bidi_it->string.s;
bc5a45f3 1702
102ebb00
EZ
1703 if (bidi_it->nchars <= 0)
1704 abort ();
bc5a45f3 1705 next_char =
87e67904
EZ
1706 bidi_it->charpos + bidi_it->nchars >= eob
1707 ? BIDI_EOB
f3014ef5
EZ
1708 : bidi_char_at_pos (bidi_it->bytepos + bidi_it->ch_len, s,
1709 bidi_it->string.unibyte);
bc5a45f3
EZ
1710 type_of_next = bidi_get_type (next_char, override);
1711
1712 if (type_of_next == WEAK_ET
1713 || type_of_next == WEAK_BN
1714 || bidi_explicit_dir_char (next_char))
1715 {
1716 bidi_copy_it (&saved_it, bidi_it);
1717 while (bidi_resolve_explicit (bidi_it) == new_level
1718 && (bidi_it->type == WEAK_BN
1719 || bidi_it->type == WEAK_ET))
1720 ;
1721 type_of_next = bidi_it->type;
1722 en_pos = bidi_it->charpos;
1723 bidi_copy_it (bidi_it, &saved_it);
1724 }
1725 if (type_of_next == WEAK_EN)
b7b65b15 1726 {
bc5a45f3
EZ
1727 /* If the last strong character is AL, the EN we've
1728 found will become AN when we get to it (W2). */
1729 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1730 {
1731 type = WEAK_EN;
1732 /* Remember this EN position, to speed up processing
1733 of the next ETs. */
1734 bidi_it->next_en_pos = en_pos;
1735 }
1736 else if (type == WEAK_BN)
1737 type = NEUTRAL_ON; /* W6/Retaining */
b7b65b15 1738 }
b7b65b15
EZ
1739 }
1740 }
1741 }
1742
1743 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
89d3374a
EZ
1744 || (type == WEAK_BN
1745 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1746 || bidi_it->prev.type_after_w1 == WEAK_ES
1747 || bidi_it->prev.type_after_w1 == WEAK_ET)))
b7b65b15
EZ
1748 type = NEUTRAL_ON;
1749
1750 /* Store the type we've got so far, before we clobber it with strong
1751 types in W7 and while resolving neutral types. But leave alone
1752 the original types that were recorded above, because we will need
1753 them for the L1 clause. */
89d3374a
EZ
1754 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1755 bidi_it->type_after_w1 = type;
1756 bidi_check_type (bidi_it->type_after_w1);
b7b65b15
EZ
1757
1758 if (type == WEAK_EN) /* W7 */
1759 {
89d3374a 1760 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
b7b65b15
EZ
1761 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1762 type = STRONG_L;
1763 }
1764
1765 bidi_it->type = type;
2d6e4628 1766 bidi_check_type (bidi_it->type);
b7b65b15
EZ
1767 return type;
1768}
1769
cbb09f04
EZ
1770/* Resolve the type of a neutral character according to the type of
1771 surrounding strong text and the current embedding level. */
58b9f433 1772static inline bidi_type_t
cbb09f04
EZ
1773bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
1774{
1775 /* N1: European and Arabic numbers are treated as though they were R. */
1776 if (next_type == WEAK_EN || next_type == WEAK_AN)
1777 next_type = STRONG_R;
1778 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
1779 prev_type = STRONG_R;
1780
1781 if (next_type == prev_type) /* N1 */
1782 return next_type;
1783 else if ((lev & 1) == 0) /* N2 */
1784 return STRONG_L;
1785 else
1786 return STRONG_R;
1787}
1788
fd3998ff 1789static bidi_type_t
b7b65b15
EZ
1790bidi_resolve_neutral (struct bidi_it *bidi_it)
1791{
1792 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1793 bidi_type_t type = bidi_resolve_weak (bidi_it);
1794 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1795
1796 if (!(type == STRONG_R
1797 || type == STRONG_L
1798 || type == WEAK_BN
1799 || type == WEAK_EN
1800 || type == WEAK_AN
1801 || type == NEUTRAL_B
1802 || type == NEUTRAL_S
1803 || type == NEUTRAL_WS
1804 || type == NEUTRAL_ON))
1805 abort ();
1806
1807 if (bidi_get_category (type) == NEUTRAL
1808 || (type == WEAK_BN && prev_level == current_level))
1809 {
1810 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1811 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1812 bidi_it->next_for_neutral.type,
1813 current_level);
1814 else
1815 {
1816 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1817 the assumption of batch-style processing; see clauses W4,
1818 W5, and especially N1, which require to look far forward
182ce2d2
EZ
1819 (as well as back) in the buffer/string. May the fleas of
1820 a thousand camels infest the armpits of those who design
b7b65b15
EZ
1821 supposedly general-purpose algorithms by looking at their
1822 own implementations, and fail to consider other possible
1823 implementations! */
1824 struct bidi_it saved_it;
1825 bidi_type_t next_type;
1826
1827 if (bidi_it->scan_dir == -1)
1828 abort ();
1829
1830 bidi_copy_it (&saved_it, bidi_it);
1831 /* Scan the text forward until we find the first non-neutral
1832 character, and then use that to resolve the neutral we
1833 are dealing with now. We also cache the scanned iterator
1834 states, to salvage some of the effort later. */
1835 bidi_cache_iterator_state (bidi_it, 0);
1836 do {
1837 /* Record the info about the previous character, so that
1838 it will be cached below with this state. */
89d3374a 1839 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1840 && bidi_it->type != WEAK_BN)
1841 bidi_remember_char (&bidi_it->prev, bidi_it);
1842 type = bidi_resolve_weak (bidi_it);
1843 /* Paragraph separators have their levels fully resolved
1844 at this point, so cache them as resolved. */
1845 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1846 /* FIXME: implement L1 here, by testing for a newline and
1847 resetting the level for any sequence of whitespace
1848 characters adjacent to it. */
1849 } while (!(type == NEUTRAL_B
1850 || (type != WEAK_BN
1851 && bidi_get_category (type) != NEUTRAL)
1852 /* This is all per level run, so stop when we
1853 reach the end of this level run. */
1854 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1855 current_level));
1856
1857 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1858
1859 switch (type)
1860 {
1861 case STRONG_L:
1862 case STRONG_R:
1863 case STRONG_AL:
1864 next_type = type;
1865 break;
1866 case WEAK_EN:
1867 case WEAK_AN:
1868 /* N1: ``European and Arabic numbers are treated as
1869 though they were R.'' */
1870 next_type = STRONG_R;
1871 saved_it.next_for_neutral.type = STRONG_R;
1872 break;
1873 case WEAK_BN:
1874 if (!bidi_explicit_dir_char (bidi_it->ch))
1875 abort (); /* can't happen: BNs are skipped */
1876 /* FALLTHROUGH */
1877 case NEUTRAL_B:
1878 /* Marched all the way to the end of this level run.
1879 We need to use the eor type, whose information is
1880 stored by bidi_set_sor_type in the prev_for_neutral
1881 member. */
1882 if (saved_it.type != WEAK_BN
89d3374a 1883 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
b7b65b15
EZ
1884 {
1885 next_type = bidi_it->prev_for_neutral.type;
1886 saved_it.next_for_neutral.type = next_type;
2d6e4628 1887 bidi_check_type (next_type);
b7b65b15
EZ
1888 }
1889 else
1890 {
1891 /* This is a BN which does not adjoin neutrals.
1892 Leave its type alone. */
1893 bidi_copy_it (bidi_it, &saved_it);
1894 return bidi_it->type;
1895 }
1896 break;
1897 default:
1898 abort ();
1899 }
1900 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1901 next_type, current_level);
1902 saved_it.type = type;
2d6e4628 1903 bidi_check_type (type);
b7b65b15
EZ
1904 bidi_copy_it (bidi_it, &saved_it);
1905 }
1906 }
1907 return type;
1908}
1909
1910/* Given an iterator state in BIDI_IT, advance one character position
182ce2d2
EZ
1911 in the buffer/string to the next character (in the logical order),
1912 resolve the bidi type of that next character, and return that
1913 type. */
fd3998ff 1914static bidi_type_t
b7b65b15
EZ
1915bidi_type_of_next_char (struct bidi_it *bidi_it)
1916{
1917 bidi_type_t type;
1918
1919 /* This should always be called during a forward scan. */
1920 if (bidi_it->scan_dir != 1)
1921 abort ();
1922
1923 /* Reset the limit until which to ignore BNs if we step out of the
1924 area where we found only empty levels. */
87e67904 1925 if ((bidi_it->ignore_bn_limit > -1
b7b65b15 1926 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
87e67904 1927 || (bidi_it->ignore_bn_limit == -2
b7b65b15 1928 && !bidi_explicit_dir_char (bidi_it->ch)))
87e67904 1929 bidi_it->ignore_bn_limit = -1;
b7b65b15
EZ
1930
1931 type = bidi_resolve_neutral (bidi_it);
1932
1933 return type;
1934}
1935
1936/* Given an iterator state BIDI_IT, advance one character position in
182ce2d2
EZ
1937 the buffer/string to the next character (in the current scan
1938 direction), resolve the embedding and implicit levels of that next
1939 character, and return the resulting level. */
fd3998ff 1940static int
b7b65b15
EZ
1941bidi_level_of_next_char (struct bidi_it *bidi_it)
1942{
1943 bidi_type_t type;
1944 int level, prev_level = -1;
1945 struct bidi_saved_info next_for_neutral;
578b494e 1946 EMACS_INT next_char_pos = -2;
b7b65b15
EZ
1947
1948 if (bidi_it->scan_dir == 1)
1949 {
9f257352
EZ
1950 EMACS_INT eob =
1951 (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
1952 ? bidi_it->string.schars : ZV;
1953
b7b65b15 1954 /* There's no sense in trying to advance if we hit end of text. */
9f257352 1955 if (bidi_it->charpos >= eob)
b7b65b15
EZ
1956 return bidi_it->resolved_level;
1957
1958 /* Record the info about the previous character. */
89d3374a 1959 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
b7b65b15
EZ
1960 && bidi_it->type != WEAK_BN)
1961 bidi_remember_char (&bidi_it->prev, bidi_it);
89d3374a
EZ
1962 if (bidi_it->type_after_w1 == STRONG_R
1963 || bidi_it->type_after_w1 == STRONG_L
1964 || bidi_it->type_after_w1 == STRONG_AL)
b7b65b15
EZ
1965 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1966 /* FIXME: it sounds like we don't need both prev and
1967 prev_for_neutral members, but I'm leaving them both for now. */
1968 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1969 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1970 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1971
1972 /* If we overstepped the characters used for resolving neutrals
1973 and whitespace, invalidate their info in the iterator. */
1974 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1975 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1976 if (bidi_it->next_en_pos >= 0
1977 && bidi_it->charpos >= bidi_it->next_en_pos)
1978 bidi_it->next_en_pos = -1;
1979 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1980 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1981 bidi_it->next_for_ws.type = UNKNOWN_BT;
1982
1983 /* This must be taken before we fill the iterator with the info
1984 about the next char. If we scan backwards, the iterator
1985 state must be already cached, so there's no need to know the
1986 embedding level of the previous character, since we will be
1987 returning to our caller shortly. */
1988 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1989 }
1990 next_for_neutral = bidi_it->next_for_neutral;
1991
182ce2d2
EZ
1992 /* Perhaps the character we want is already cached. If it is, the
1993 call to bidi_cache_find below will return a type other than
1994 UNKNOWN_BT. */
87e67904 1995 if (bidi_cache_idx > bidi_cache_start && !bidi_it->first_elt)
102ebb00 1996 {
9f257352
EZ
1997 int bob =
1998 (bidi_it->string.s || STRINGP (bidi_it->string.lstring)) ? 0 : 1;
1999
102ebb00
EZ
2000 if (bidi_it->scan_dir > 0)
2001 {
2002 if (bidi_it->nchars <= 0)
2003 abort ();
2004 next_char_pos = bidi_it->charpos + bidi_it->nchars;
2005 }
9f257352 2006 else if (bidi_it->charpos >= bob)
bb269206
EZ
2007 /* Implementation note: we allow next_char_pos to be as low as
2008 0 for buffers or -1 for strings, and that is okay because
2009 that's the "position" of the sentinel iterator state we
2010 cached at the beginning of the iteration. */
102ebb00 2011 next_char_pos = bidi_it->charpos - 1;
578b494e 2012 if (next_char_pos >= bob - 1)
87e67904
EZ
2013 type = bidi_cache_find (next_char_pos, -1, bidi_it);
2014 else
2015 type = UNKNOWN_BT;
102ebb00 2016 }
182ce2d2 2017 else
102ebb00 2018 type = UNKNOWN_BT;
b7b65b15
EZ
2019 if (type != UNKNOWN_BT)
2020 {
2021 /* Don't lose the information for resolving neutrals! The
2022 cached states could have been cached before their
2023 next_for_neutral member was computed. If we are on our way
2024 forward, we can simply take the info from the previous
2025 state. */
2026 if (bidi_it->scan_dir == 1
2027 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
2028 bidi_it->next_for_neutral = next_for_neutral;
2029
2030 /* If resolved_level is -1, it means this state was cached
2031 before it was completely resolved, so we cannot return
2032 it. */
2033 if (bidi_it->resolved_level != -1)
2034 return bidi_it->resolved_level;
2035 }
2036 if (bidi_it->scan_dir == -1)
2037 /* If we are going backwards, the iterator state is already cached
2038 from previous scans, and should be fully resolved. */
2039 abort ();
2040
2041 if (type == UNKNOWN_BT)
2042 type = bidi_type_of_next_char (bidi_it);
2043
2044 if (type == NEUTRAL_B)
2045 return bidi_it->resolved_level;
2046
2047 level = bidi_it->level_stack[bidi_it->stack_idx].level;
2048 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
2049 || (type == WEAK_BN && prev_level == level))
2050 {
2051 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
2052 abort ();
2053
2054 /* If the cached state shows a neutral character, it was not
2055 resolved by bidi_resolve_neutral, so do it now. */
2056 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
2057 bidi_it->next_for_neutral.type,
2058 level);
2059 }
2060
2061 if (!(type == STRONG_R
2062 || type == STRONG_L
2063 || type == WEAK_BN
2064 || type == WEAK_EN
2065 || type == WEAK_AN))
2066 abort ();
2067 bidi_it->type = type;
2d6e4628 2068 bidi_check_type (bidi_it->type);
b7b65b15
EZ
2069
2070 /* For L1 below, we need to know, for each WS character, whether
0d327994 2071 it belongs to a sequence of WS characters preceding a newline
b7b65b15 2072 or a TAB or a paragraph separator. */
89d3374a 2073 if (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
2074 && bidi_it->next_for_ws.type == UNKNOWN_BT)
2075 {
2076 int ch;
8264569d 2077 EMACS_INT clen = bidi_it->ch_len;
6bff6497
EZ
2078 EMACS_INT bpos = bidi_it->bytepos;
2079 EMACS_INT cpos = bidi_it->charpos;
182ce2d2 2080 EMACS_INT disp_pos = bidi_it->disp_pos;
102ebb00 2081 EMACS_INT nc = bidi_it->nchars;
87e67904 2082 struct bidi_string_data bs = bidi_it->string;
b7b65b15 2083 bidi_type_t chtype;
fc6f18ce 2084 int fwp = bidi_it->frame_window_p;
55439c61 2085 int dpp = bidi_it->disp_prop_p;
b7b65b15 2086
102ebb00
EZ
2087 if (bidi_it->nchars <= 0)
2088 abort ();
b7b65b15 2089 do {
55439c61
EZ
2090 ch = bidi_fetch_char (bpos += clen, cpos += nc, &disp_pos, &dpp, &bs,
2091 fwp, &clen, &nc);
e7402cb2 2092 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
b7b65b15
EZ
2093 chtype = NEUTRAL_B;
2094 else
6bff6497 2095 chtype = bidi_get_type (ch, NEUTRAL_DIR);
b7b65b15
EZ
2096 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
2097 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
2098 bidi_it->next_for_ws.type = chtype;
2d6e4628 2099 bidi_check_type (bidi_it->next_for_ws.type);
b7b65b15
EZ
2100 bidi_it->next_for_ws.charpos = cpos;
2101 bidi_it->next_for_ws.bytepos = bpos;
2102 }
2103
2104 /* Resolve implicit levels, with a twist: PDFs get the embedding
2105 level of the enbedding they terminate. See below for the
2106 reason. */
89d3374a 2107 if (bidi_it->orig_type == PDF
b7b65b15
EZ
2108 /* Don't do this if this formatting code didn't change the
2109 embedding level due to invalid or empty embeddings. */
2110 && prev_level != level)
2111 {
2112 /* Don't look in UAX#9 for the reason for this: it's our own
2113 private quirk. The reason is that we want the formatting
2114 codes to be delivered so that they bracket the text of their
2115 embedding. For example, given the text
2116
2117 {RLO}teST{PDF}
2118
2119 we want it to be displayed as
2120
0d68907d 2121 {PDF}STet{RLO}
b7b65b15
EZ
2122
2123 not as
2124
2125 STet{RLO}{PDF}
2126
2127 which will result because we bump up the embedding level as
2128 soon as we see the RLO and pop it as soon as we see the PDF,
2129 so RLO itself has the same embedding level as "teST", and
2130 thus would be normally delivered last, just before the PDF.
2131 The switch below fiddles with the level of PDF so that this
2132 ugly side effect does not happen.
2133
2134 (This is, of course, only important if the formatting codes
e7402cb2
EZ
2135 are actually displayed, but Emacs does need to display them
2136 if the user wants to.) */
b7b65b15
EZ
2137 level = prev_level;
2138 }
89d3374a
EZ
2139 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
2140 || bidi_it->orig_type == NEUTRAL_S
e7402cb2
EZ
2141 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
2142 /* || bidi_it->ch == LINESEP_CHAR */
89d3374a 2143 || (bidi_it->orig_type == NEUTRAL_WS
b7b65b15
EZ
2144 && (bidi_it->next_for_ws.type == NEUTRAL_B
2145 || bidi_it->next_for_ws.type == NEUTRAL_S)))
2146 level = bidi_it->level_stack[0].level;
2147 else if ((level & 1) == 0) /* I1 */
2148 {
2149 if (type == STRONG_R)
2150 level++;
2151 else if (type == WEAK_EN || type == WEAK_AN)
2152 level += 2;
2153 }
2154 else /* I2 */
2155 {
2156 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
2157 level++;
2158 }
2159
2160 bidi_it->resolved_level = level;
2161 return level;
2162}
2163
2164/* Move to the other edge of a level given by LEVEL. If END_FLAG is
2165 non-zero, we are at the end of a level, and we need to prepare to
2166 resume the scan of the lower level.
2167
2168 If this level's other edge is cached, we simply jump to it, filling
2169 the iterator structure with the iterator state on the other edge.
182ce2d2
EZ
2170 Otherwise, we walk the buffer or string until we come back to the
2171 same level as LEVEL.
b7b65b15
EZ
2172
2173 Note: we are not talking here about a ``level run'' in the UAX#9
2174 sense of the term, but rather about a ``level'' which includes
2175 all the levels higher than it. In other words, given the levels
2176 like this:
2177
2178 11111112222222333333334443343222222111111112223322111
2179 A B C
2180
2181 and assuming we are at point A scanning left to right, this
2182 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
2183 at point B. */
2184static void
2185bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
2186{
2187 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
39e378da 2188 ptrdiff_t idx;
b7b65b15
EZ
2189
2190 /* Try the cache first. */
87e67904
EZ
2191 if ((idx = bidi_cache_find_level_change (level, dir, end_flag))
2192 >= bidi_cache_start)
b7b65b15
EZ
2193 bidi_cache_fetch_state (idx, bidi_it);
2194 else
2195 {
2196 int new_level;
2197
2198 if (end_flag)
2199 abort (); /* if we are at end of level, its edges must be cached */
2200
2201 bidi_cache_iterator_state (bidi_it, 1);
2202 do {
2203 new_level = bidi_level_of_next_char (bidi_it);
2204 bidi_cache_iterator_state (bidi_it, 1);
2205 } while (new_level >= level);
2206 }
2207}
2208
2209void
4b292a22 2210bidi_move_to_visually_next (struct bidi_it *bidi_it)
b7b65b15
EZ
2211{
2212 int old_level, new_level, next_level;
9c82e145 2213 struct bidi_it sentinel;
acb28818 2214 struct gcpro gcpro1;
b7b65b15 2215
87e67904
EZ
2216 if (bidi_it->charpos < 0 || bidi_it->bytepos < 0)
2217 abort ();
b7b65b15
EZ
2218
2219 if (bidi_it->scan_dir == 0)
2220 {
2221 bidi_it->scan_dir = 1; /* default to logical order */
2222 }
2223
acb28818 2224 /* The code below can call eval, and thus cause GC. If we are
7e2ad32c 2225 iterating a Lisp string, make sure it won't be GCed. */
acb28818
EZ
2226 if (STRINGP (bidi_it->string.lstring))
2227 GCPRO1 (bidi_it->string.lstring);
2228
be39f003
EZ
2229 /* If we just passed a newline, initialize for the next line. */
2230 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
2231 bidi_line_init (bidi_it);
2232
6dcfd253
EZ
2233 /* Prepare the sentinel iterator state, and cache it. When we bump
2234 into it, scanning backwards, we'll know that the last non-base
2235 level is exhausted. */
87e67904 2236 if (bidi_cache_idx == bidi_cache_start)
9c82e145
EZ
2237 {
2238 bidi_copy_it (&sentinel, bidi_it);
2239 if (bidi_it->first_elt)
2240 {
2241 sentinel.charpos--; /* cached charpos needs to be monotonic */
2242 sentinel.bytepos--;
2243 sentinel.ch = '\n'; /* doesn't matter, but why not? */
2244 sentinel.ch_len = 1;
182ce2d2 2245 sentinel.nchars = 1;
9c82e145 2246 }
6dcfd253 2247 bidi_cache_iterator_state (&sentinel, 1);
9c82e145 2248 }
b7b65b15
EZ
2249
2250 old_level = bidi_it->resolved_level;
2251 new_level = bidi_level_of_next_char (bidi_it);
b7b65b15
EZ
2252
2253 /* Reordering of resolved levels (clause L2) is implemented by
2254 jumping to the other edge of the level and flipping direction of
c0546589 2255 scanning the text whenever we find a level change. */
b7b65b15
EZ
2256 if (new_level != old_level)
2257 {
2258 int ascending = new_level > old_level;
2259 int level_to_search = ascending ? old_level + 1 : old_level;
2260 int incr = ascending ? 1 : -1;
2261 int expected_next_level = old_level + incr;
2262
b7b65b15
EZ
2263 /* Jump (or walk) to the other edge of this level. */
2264 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2265 /* Switch scan direction and peek at the next character in the
2266 new direction. */
2267 bidi_it->scan_dir = -bidi_it->scan_dir;
2268
2269 /* The following loop handles the case where the resolved level
2270 jumps by more than one. This is typical for numbers inside a
2271 run of text with left-to-right embedding direction, but can
2272 also happen in other situations. In those cases the decision
2273 where to continue after a level change, and in what direction,
2274 is tricky. For example, given a text like below:
2275
2276 abcdefgh
2277 11336622
2278
2279 (where the numbers below the text show the resolved levels),
2280 the result of reordering according to UAX#9 should be this:
2281
2282 efdcghba
2283
2284 This is implemented by the loop below which flips direction
2285 and jumps to the other edge of the level each time it finds
2286 the new level not to be the expected one. The expected level
2287 is always one more or one less than the previous one. */
2288 next_level = bidi_peek_at_next_level (bidi_it);
2289 while (next_level != expected_next_level)
2290 {
2291 expected_next_level += incr;
2292 level_to_search += incr;
2293 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
2294 bidi_it->scan_dir = -bidi_it->scan_dir;
2295 next_level = bidi_peek_at_next_level (bidi_it);
2296 }
2297
2298 /* Finally, deliver the next character in the new direction. */
2299 next_level = bidi_level_of_next_char (bidi_it);
2300 }
2301
b44d9321
EZ
2302 /* Take note when we have just processed the newline that precedes
2303 the end of the paragraph. The next time we are about to be
2304 called, set_iterator_to_next will automatically reinit the
2305 paragraph direction, if needed. We do this at the newline before
2306 the paragraph separator, because the next character might not be
2307 the first character of the next paragraph, due to the bidi
c0546589
EZ
2308 reordering, whereas we _must_ know the paragraph base direction
2309 _before_ we process the paragraph's text, since the base
2310 direction affects the reordering. */
87e67904 2311 if (bidi_it->scan_dir == 1 && bidi_it->orig_type == NEUTRAL_B)
be39f003 2312 {
87e67904
EZ
2313 /* The paragraph direction of the entire string, once
2314 determined, is in effect for the entire string. Setting the
2315 separator limit to the end of the string prevents
2316 bidi_paragraph_init from being called automatically on this
2317 string. */
9f257352 2318 if (bidi_it->string.s || STRINGP (bidi_it->string.lstring))
87e67904
EZ
2319 bidi_it->separator_limit = bidi_it->string.schars;
2320 else if (bidi_it->bytepos < ZV_BYTE)
be39f003 2321 {
87e67904
EZ
2322 EMACS_INT sep_len =
2323 bidi_at_paragraph_end (bidi_it->charpos + bidi_it->nchars,
2324 bidi_it->bytepos + bidi_it->ch_len);
2325 if (bidi_it->nchars <= 0)
2326 abort ();
2327 if (sep_len >= 0)
2328 {
2329 bidi_it->new_paragraph = 1;
2330 /* Record the buffer position of the last character of the
2331 paragraph separator. */
2332 bidi_it->separator_limit =
2333 bidi_it->charpos + bidi_it->nchars + sep_len;
2334 }
be39f003
EZ
2335 }
2336 }
6bff6497 2337
87e67904 2338 if (bidi_it->scan_dir == 1 && bidi_cache_idx > bidi_cache_start)
b7b65b15
EZ
2339 {
2340 /* If we are at paragraph's base embedding level and beyond the
2341 last cached position, the cache's job is done and we can
2342 discard it. */
2343 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
182ce2d2
EZ
2344 && bidi_it->charpos > (bidi_cache[bidi_cache_idx - 1].charpos
2345 + bidi_cache[bidi_cache_idx - 1].nchars - 1))
b7b65b15
EZ
2346 bidi_cache_reset ();
2347 /* But as long as we are caching during forward scan, we must
2348 cache each state, or else the cache integrity will be
2349 compromised: it assumes cached states correspond to buffer
2350 positions 1:1. */
2351 else
2352 bidi_cache_iterator_state (bidi_it, 1);
2353 }
acb28818
EZ
2354
2355 if (STRINGP (bidi_it->string.lstring))
2356 UNGCPRO;
b7b65b15
EZ
2357}
2358
2359/* This is meant to be called from within the debugger, whenever you
2360 wish to examine the cache contents. */
e78aecca 2361void bidi_dump_cached_states (void) EXTERNALLY_VISIBLE;
b7b65b15
EZ
2362void
2363bidi_dump_cached_states (void)
2364{
39e378da 2365 ptrdiff_t i;
b7b65b15
EZ
2366 int ndigits = 1;
2367
2368 if (bidi_cache_idx == 0)
2369 {
2370 fprintf (stderr, "The cache is empty.\n");
2371 return;
2372 }
8265d3bb 2373 fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
b7b65b15
EZ
2374 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
2375
2376 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
2377 ndigits++;
2378 fputs ("ch ", stderr);
2379 for (i = 0; i < bidi_cache_idx; i++)
2380 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
2381 fputs ("\n", stderr);
2382 fputs ("lvl ", stderr);
2383 for (i = 0; i < bidi_cache_idx; i++)
2384 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
2385 fputs ("\n", stderr);
2386 fputs ("pos ", stderr);
2387 for (i = 0; i < bidi_cache_idx; i++)
c2982e87 2388 fprintf (stderr, "%*"pI"d", ndigits, bidi_cache[i].charpos);
b7b65b15
EZ
2389 fputs ("\n", stderr);
2390}