Retrospective commit from 2009-09-26.
[bpt/emacs.git] / src / bidi.c
1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
2 Copyright (C) 2000, 2001, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 /* Written by Eli Zaretskii <eliz@gnu.org>.
23
24 A sequential implementation of the Unicode Bidirectional algorithm,
25 as per UAX#9, a part of the Unicode Standard.
26
27 Unlike the reference and most other implementations, this one is
28 designed to be called once for every character in the buffer.
29
30 The main entry point is bidi_get_next_char_visually. Each time it
31 is called, it finds the next character in the visual order, and
32 returns its information in a special structure. The caller is then
33 expected to process this character for display or any other
34 purposes, and call bidi_get_next_char_visually for the next
35 character. See the comments in bidi_get_next_char_visually for
36 more details about its algorithm that finds the next visual-order
37 character by resolving their levels on the fly.
38
39 If you want to understand the code, you will have to read it
40 together with the relevant portions of UAX#9. The comments include
41 references to UAX#9 rules, for that very reason.
42
43 A note about references to UAX#9 rules: if the reference says
44 something like "X9/Retaining", it means that you need to refer to
45 rule X9 and to its modifications decribed in the "Implementation
46 Notes" section of UAX#9, under "Retaining Format Codes". */
47
48 #ifdef HAVE_CONFIG_H
49 #include <config.h>
50 #endif
51
52 #include <stdio.h>
53
54 #ifdef HAVE_STRING_H
55 #include <string.h>
56 #endif
57
58 #include "lisp.h"
59 #include "buffer.h"
60 #include "character.h"
61 #include "dispextern.h"
62
63 static int bidi_initialized = 0;
64
65 static Lisp_Object bidi_type_table;
66
67 /* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */
68 #define LRM_CHAR 0x200E
69 #define RLM_CHAR 0x200F
70 #define LRE_CHAR 0x202A
71 #define RLE_CHAR 0x202B
72 #define PDF_CHAR 0x202C
73 #define LRO_CHAR 0x202D
74 #define RLO_CHAR 0x202E
75
76 #define BIDI_EOB -1
77 #define BIDI_BOB -2 /* FIXME: Is this needed? */
78
79 /* Local data structures. (Look in dispextern.h for the rest.) */
80
81 /* What we need to know about the current paragraph. */
82 struct bidi_paragraph_info {
83 int start_bytepos; /* byte position where it begins */
84 int end_bytepos; /* byte position where it ends */
85 int embedding_level; /* its basic embedding level */
86 bidi_dir_t base_dir; /* its base direction */
87 };
88
89 /* Data type for describing the bidirectional character categories. */
90 typedef enum {
91 UNKNOWN_BC,
92 NEUTRAL,
93 WEAK,
94 STRONG
95 } bidi_category_t;
96
97 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
98
99 /* FIXME: Should be user-definable. */
100 bidi_dir_t bidi_overriding_paragraph_direction = L2R;
101
102 /* FIXME: Unused? */
103 #define ASCII_BIDI_TYPE_SET(STR, TYPE) \
104 do { \
105 unsigned char *p; \
106 for (p = (STR); *p; p++) \
107 CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \
108 } while (0)
109
110 static void
111 bidi_initialize ()
112 {
113 /* FIXME: This should come from the Unicode Database. */
114 struct {
115 int from, to;
116 bidi_type_t type;
117 } bidi_type[] =
118 { { 0x0000, 0x0008, WEAK_BN },
119 { 0x0009, 0x0000, NEUTRAL_S },
120 { 0x000A, 0x0000, NEUTRAL_B },
121 { 0x000B, 0x0000, NEUTRAL_S },
122 { 0x000C, 0x0000, NEUTRAL_WS },
123 { 0x000D, 0x0000, NEUTRAL_B },
124 { 0x000E, 0x001B, WEAK_BN },
125 { 0x001C, 0x001E, NEUTRAL_B },
126 { 0x001F, 0x0000, NEUTRAL_S },
127 { 0x0020, 0x0000, NEUTRAL_WS },
128 { 0x0021, 0x0022, NEUTRAL_ON },
129 { 0x0023, 0x0025, WEAK_ET },
130 { 0x0026, 0x002A, NEUTRAL_ON },
131 { 0x002B, 0x0000, WEAK_ET },
132 { 0x002C, 0x0000, WEAK_CS },
133 { 0x002D, 0x0000, WEAK_ET },
134 { 0x002E, 0x0000, WEAK_CS },
135 { 0x002F, 0x0000, WEAK_ES },
136 { 0x0030, 0x0039, WEAK_EN },
137 { 0x003A, 0x0000, WEAK_CS },
138 { 0x003B, 0x0040, NEUTRAL_ON },
139 { 0x005B, 0x0060, NEUTRAL_ON },
140 { 0x007B, 0x007E, NEUTRAL_ON },
141 { 0x007F, 0x0084, WEAK_BN },
142 { 0x0085, 0x0000, NEUTRAL_B },
143 { 0x0086, 0x009F, WEAK_BN },
144 { 0x00A0, 0x0000, WEAK_CS },
145 { 0x00A1, 0x0000, NEUTRAL_ON },
146 { 0x00A2, 0x00A5, WEAK_ET },
147 { 0x00A6, 0x00A9, NEUTRAL_ON },
148 { 0x00AB, 0x00AF, NEUTRAL_ON },
149 { 0x00B0, 0x00B1, WEAK_ET },
150 { 0x00B2, 0x00B3, WEAK_EN },
151 { 0x00B4, 0x0000, NEUTRAL_ON },
152 { 0x00B6, 0x00B8, NEUTRAL_ON },
153 { 0x00B9, 0x0000, WEAK_EN },
154 { 0x00BB, 0x00BF, NEUTRAL_ON },
155 { 0x00D7, 0x0000, NEUTRAL_ON },
156 { 0x00F7, 0x0000, NEUTRAL_ON },
157 { 0x02B9, 0x02BA, NEUTRAL_ON },
158 { 0x02C2, 0x02CF, NEUTRAL_ON },
159 { 0x02D2, 0x02DF, NEUTRAL_ON },
160 { 0x02E5, 0x02ED, NEUTRAL_ON },
161 { 0x0300, 0x036F, WEAK_NSM },
162 { 0x0374, 0x0375, NEUTRAL_ON },
163 { 0x037E, 0x0385, NEUTRAL_ON },
164 { 0x0387, 0x0000, NEUTRAL_ON },
165 { 0x03F6, 0x0000, NEUTRAL_ON },
166 { 0x0483, 0x0489, WEAK_NSM },
167 { 0x058A, 0x0000, NEUTRAL_ON },
168 { 0x0591, 0x05BD, WEAK_NSM },
169 { 0x05BE, 0x0000, STRONG_R },
170 { 0x05BF, 0x0000, WEAK_NSM },
171 { 0x05C0, 0x0000, STRONG_R },
172 { 0x05C1, 0x05C2, WEAK_NSM },
173 { 0x05C3, 0x0000, STRONG_R },
174 { 0x05C4, 0x0000, WEAK_NSM },
175 { 0x05D0, 0x05F4, STRONG_R },
176 { 0x060C, 0x0000, WEAK_CS },
177 { 0x061B, 0x064A, STRONG_AL },
178 { 0x064B, 0x0655, WEAK_NSM },
179 { 0x0660, 0x0669, WEAK_AN },
180 { 0x066A, 0x0000, WEAK_ET },
181 { 0x066B, 0x066C, WEAK_AN },
182 { 0x066D, 0x066F, STRONG_AL },
183 { 0x0670, 0x0000, WEAK_NSM },
184 { 0x0671, 0x06D5, STRONG_AL },
185 { 0x06D6, 0x06DC, WEAK_NSM },
186 { 0x06DD, 0x0000, STRONG_AL },
187 { 0x06DE, 0x06E4, WEAK_NSM },
188 { 0x06E5, 0x06E6, STRONG_AL },
189 { 0x06E7, 0x06E8, WEAK_NSM },
190 { 0x06E9, 0x0000, NEUTRAL_ON },
191 { 0x06EA, 0x06ED, WEAK_NSM },
192 { 0x06F0, 0x06F9, WEAK_EN },
193 { 0x06FA, 0x070D, STRONG_AL },
194 { 0x070F, 0x0000, WEAK_BN },
195 { 0x0710, 0x0000, STRONG_AL },
196 { 0x0711, 0x0000, WEAK_NSM },
197 { 0x0712, 0x072C, STRONG_AL },
198 { 0x0730, 0x074A, WEAK_NSM },
199 { 0x0780, 0x07A5, STRONG_AL },
200 { 0x07A6, 0x07B0, WEAK_NSM },
201 { 0x07B1, 0x0000, STRONG_AL },
202 { 0x0901, 0x0902, WEAK_NSM },
203 { 0x093C, 0x0000, WEAK_NSM },
204 { 0x0941, 0x0948, WEAK_NSM },
205 { 0x094D, 0x0000, WEAK_NSM },
206 { 0x0951, 0x0954, WEAK_NSM },
207 { 0x0962, 0x0963, WEAK_NSM },
208 { 0x0981, 0x0000, WEAK_NSM },
209 { 0x09BC, 0x0000, WEAK_NSM },
210 { 0x09C1, 0x09C4, WEAK_NSM },
211 { 0x09CD, 0x0000, WEAK_NSM },
212 { 0x09E2, 0x09E3, WEAK_NSM },
213 { 0x09F2, 0x09F3, WEAK_ET },
214 { 0x0A02, 0x0000, WEAK_NSM },
215 { 0x0A3C, 0x0000, WEAK_NSM },
216 { 0x0A41, 0x0A4D, WEAK_NSM },
217 { 0x0A70, 0x0A71, WEAK_NSM },
218 { 0x0A81, 0x0A82, WEAK_NSM },
219 { 0x0ABC, 0x0000, WEAK_NSM },
220 { 0x0AC1, 0x0AC8, WEAK_NSM },
221 { 0x0ACD, 0x0000, WEAK_NSM },
222 { 0x0B01, 0x0000, WEAK_NSM },
223 { 0x0B3C, 0x0000, WEAK_NSM },
224 { 0x0B3F, 0x0000, WEAK_NSM },
225 { 0x0B41, 0x0B43, WEAK_NSM },
226 { 0x0B4D, 0x0B56, WEAK_NSM },
227 { 0x0B82, 0x0000, WEAK_NSM },
228 { 0x0BC0, 0x0000, WEAK_NSM },
229 { 0x0BCD, 0x0000, WEAK_NSM },
230 { 0x0C3E, 0x0C40, WEAK_NSM },
231 { 0x0C46, 0x0C56, WEAK_NSM },
232 { 0x0CBF, 0x0000, WEAK_NSM },
233 { 0x0CC6, 0x0000, WEAK_NSM },
234 { 0x0CCC, 0x0CCD, WEAK_NSM },
235 { 0x0D41, 0x0D43, WEAK_NSM },
236 { 0x0D4D, 0x0000, WEAK_NSM },
237 { 0x0DCA, 0x0000, WEAK_NSM },
238 { 0x0DD2, 0x0DD6, WEAK_NSM },
239 { 0x0E31, 0x0000, WEAK_NSM },
240 { 0x0E34, 0x0E3A, WEAK_NSM },
241 { 0x0E3F, 0x0000, WEAK_ET },
242 { 0x0E47, 0x0E4E, WEAK_NSM },
243 { 0x0EB1, 0x0000, WEAK_NSM },
244 { 0x0EB4, 0x0EBC, WEAK_NSM },
245 { 0x0EC8, 0x0ECD, WEAK_NSM },
246 { 0x0F18, 0x0F19, WEAK_NSM },
247 { 0x0F35, 0x0000, WEAK_NSM },
248 { 0x0F37, 0x0000, WEAK_NSM },
249 { 0x0F39, 0x0000, WEAK_NSM },
250 { 0x0F3A, 0x0F3D, NEUTRAL_ON },
251 { 0x0F71, 0x0F7E, WEAK_NSM },
252 { 0x0F80, 0x0F84, WEAK_NSM },
253 { 0x0F86, 0x0F87, WEAK_NSM },
254 { 0x0F90, 0x0FBC, WEAK_NSM },
255 { 0x0FC6, 0x0000, WEAK_NSM },
256 { 0x102D, 0x1030, WEAK_NSM },
257 { 0x1032, 0x1037, WEAK_NSM },
258 { 0x1039, 0x0000, WEAK_NSM },
259 { 0x1058, 0x1059, WEAK_NSM },
260 { 0x1680, 0x0000, NEUTRAL_WS },
261 { 0x169B, 0x169C, NEUTRAL_ON },
262 { 0x1712, 0x1714, WEAK_NSM },
263 { 0x1732, 0x1734, WEAK_NSM },
264 { 0x1752, 0x1753, WEAK_NSM },
265 { 0x1772, 0x1773, WEAK_NSM },
266 { 0x17B7, 0x17BD, WEAK_NSM },
267 { 0x17C6, 0x0000, WEAK_NSM },
268 { 0x17C9, 0x17D3, WEAK_NSM },
269 { 0x17DB, 0x0000, WEAK_ET },
270 { 0x1800, 0x180A, NEUTRAL_ON },
271 { 0x180B, 0x180D, WEAK_NSM },
272 { 0x180E, 0x0000, WEAK_BN },
273 { 0x18A9, 0x0000, WEAK_NSM },
274 { 0x1FBD, 0x0000, NEUTRAL_ON },
275 { 0x1FBF, 0x1FC1, NEUTRAL_ON },
276 { 0x1FCD, 0x1FCF, NEUTRAL_ON },
277 { 0x1FDD, 0x1FDF, NEUTRAL_ON },
278 { 0x1FED, 0x1FEF, NEUTRAL_ON },
279 { 0x1FFD, 0x1FFE, NEUTRAL_ON },
280 { 0x2000, 0x200A, NEUTRAL_WS },
281 { 0x200B, 0x200D, WEAK_BN },
282 { 0x200F, 0x0000, STRONG_R },
283 { 0x2010, 0x2027, NEUTRAL_ON },
284 { 0x2028, 0x0000, NEUTRAL_WS },
285 { 0x2029, 0x0000, NEUTRAL_B },
286 { 0x202A, 0x0000, LRE },
287 { 0x202B, 0x0000, RLE },
288 { 0x202C, 0x0000, PDF },
289 { 0x202D, 0x0000, LRO },
290 { 0x202E, 0x0000, RLO },
291 { 0x202F, 0x0000, NEUTRAL_WS },
292 { 0x2030, 0x2034, WEAK_ET },
293 { 0x2035, 0x2057, NEUTRAL_ON },
294 { 0x205F, 0x0000, NEUTRAL_WS },
295 { 0x2060, 0x206F, WEAK_BN },
296 { 0x2070, 0x0000, WEAK_EN },
297 { 0x2074, 0x2079, WEAK_EN },
298 { 0x207A, 0x207B, WEAK_ET },
299 { 0x207C, 0x207E, NEUTRAL_ON },
300 { 0x2080, 0x2089, WEAK_EN },
301 { 0x208A, 0x208B, WEAK_ET },
302 { 0x208C, 0x208E, NEUTRAL_ON },
303 { 0x20A0, 0x20B1, WEAK_ET },
304 { 0x20D0, 0x20EA, WEAK_NSM },
305 { 0x2100, 0x2101, NEUTRAL_ON },
306 { 0x2103, 0x2106, NEUTRAL_ON },
307 { 0x2108, 0x2109, NEUTRAL_ON },
308 { 0x2114, 0x0000, NEUTRAL_ON },
309 { 0x2116, 0x2118, NEUTRAL_ON },
310 { 0x211E, 0x2123, NEUTRAL_ON },
311 { 0x2125, 0x0000, NEUTRAL_ON },
312 { 0x2127, 0x0000, NEUTRAL_ON },
313 { 0x2129, 0x0000, NEUTRAL_ON },
314 { 0x212E, 0x0000, WEAK_ET },
315 { 0x2132, 0x0000, NEUTRAL_ON },
316 { 0x213A, 0x0000, NEUTRAL_ON },
317 { 0x2140, 0x2144, NEUTRAL_ON },
318 { 0x214A, 0x215F, NEUTRAL_ON },
319 { 0x2190, 0x2211, NEUTRAL_ON },
320 { 0x2212, 0x2213, WEAK_ET },
321 { 0x2214, 0x2335, NEUTRAL_ON },
322 { 0x237B, 0x2394, NEUTRAL_ON },
323 { 0x2396, 0x244A, NEUTRAL_ON },
324 { 0x2460, 0x249B, WEAK_EN },
325 { 0x24EA, 0x0000, WEAK_EN },
326 { 0x24EB, 0x2FFB, NEUTRAL_ON },
327 { 0x3000, 0x0000, NEUTRAL_WS },
328 { 0x3001, 0x3004, NEUTRAL_ON },
329 { 0x3008, 0x3020, NEUTRAL_ON },
330 { 0x302A, 0x302F, WEAK_NSM },
331 { 0x3030, 0x0000, NEUTRAL_ON },
332 { 0x3036, 0x3037, NEUTRAL_ON },
333 { 0x303D, 0x303F, NEUTRAL_ON },
334 { 0x3099, 0x309A, WEAK_NSM },
335 { 0x309B, 0x309C, NEUTRAL_ON },
336 { 0x30A0, 0x0000, NEUTRAL_ON },
337 { 0x30FB, 0x0000, NEUTRAL_ON },
338 { 0x3251, 0x325F, NEUTRAL_ON },
339 { 0x32B1, 0x32BF, NEUTRAL_ON },
340 { 0xA490, 0xA4C6, NEUTRAL_ON },
341 { 0xFB1D, 0x0000, STRONG_R },
342 { 0xFB1E, 0x0000, WEAK_NSM },
343 { 0xFB1F, 0xFB28, STRONG_R },
344 { 0xFB29, 0x0000, WEAK_ET },
345 { 0xFB2A, 0xFB4F, STRONG_R },
346 { 0xFB50, 0xFD3D, STRONG_AL },
347 { 0xFD3E, 0xFD3F, NEUTRAL_ON },
348 { 0xFD50, 0xFDFC, STRONG_AL },
349 { 0xFE00, 0xFE23, WEAK_NSM },
350 { 0xFE30, 0xFE4F, NEUTRAL_ON },
351 { 0xFE50, 0x0000, WEAK_CS },
352 { 0xFE51, 0x0000, NEUTRAL_ON },
353 { 0xFE52, 0x0000, WEAK_CS },
354 { 0xFE54, 0x0000, NEUTRAL_ON },
355 { 0xFE55, 0x0000, WEAK_CS },
356 { 0xFE56, 0xFE5E, NEUTRAL_ON },
357 { 0xFE5F, 0x0000, WEAK_ET },
358 { 0xFE60, 0xFE61, NEUTRAL_ON },
359 { 0xFE62, 0xFE63, WEAK_ET },
360 { 0xFE64, 0xFE68, NEUTRAL_ON },
361 { 0xFE69, 0xFE6A, WEAK_ET },
362 { 0xFE6B, 0x0000, NEUTRAL_ON },
363 { 0xFE70, 0xFEFC, STRONG_AL },
364 { 0xFEFF, 0x0000, WEAK_BN },
365 { 0xFF01, 0xFF02, NEUTRAL_ON },
366 { 0xFF03, 0xFF05, WEAK_ET },
367 { 0xFF06, 0xFF0A, NEUTRAL_ON },
368 { 0xFF0B, 0x0000, WEAK_ET },
369 { 0xFF0C, 0x0000, WEAK_CS },
370 { 0xFF0D, 0x0000, WEAK_ET },
371 { 0xFF0E, 0x0000, WEAK_CS },
372 { 0xFF0F, 0x0000, WEAK_ES },
373 { 0xFF10, 0xFF19, WEAK_EN },
374 { 0xFF1A, 0x0000, WEAK_CS },
375 { 0xFF1B, 0xFF20, NEUTRAL_ON },
376 { 0xFF3B, 0xFF40, NEUTRAL_ON },
377 { 0xFF5B, 0xFF65, NEUTRAL_ON },
378 { 0xFFE0, 0xFFE1, WEAK_ET },
379 { 0xFFE2, 0xFFE4, NEUTRAL_ON },
380 { 0xFFE5, 0xFFE6, WEAK_ET },
381 { 0xFFE8, 0xFFEE, NEUTRAL_ON },
382 { 0xFFF9, 0xFFFB, WEAK_BN },
383 { 0xFFFC, 0xFFFD, NEUTRAL_ON },
384 { 0x1D167, 0x1D169, WEAK_NSM },
385 { 0x1D173, 0x1D17A, WEAK_BN },
386 { 0x1D17B, 0x1D182, WEAK_NSM },
387 { 0x1D185, 0x1D18B, WEAK_NSM },
388 { 0x1D1AA, 0x1D1AD, WEAK_NSM },
389 { 0x1D7CE, 0x1D7FF, WEAK_EN },
390 { 0xE0001, 0xE007F, WEAK_BN } };
391 int i;
392
393 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
394 staticpro (&bidi_type_table);
395
396 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
397 char_table_set_range (bidi_type_table, bidi_type[i].from,
398 bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from,
399 make_number (bidi_type[i].type));
400 bidi_initialized = 1;
401 }
402
403 static int
404 bidi_is_arabic_number (int ch)
405 {
406 return 0; /* FIXME! */
407 }
408
409 /* Return the bidi type of a character CH. */
410 bidi_type_t
411 bidi_get_type (int ch)
412 {
413 if (ch == BIDI_EOB)
414 return NEUTRAL_B;
415 return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
416 }
417
418 void
419 bidi_check_type (bidi_type_t type)
420 {
421 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
422 abort ();
423 }
424
425 /* Given a bidi TYPE of a character, return its category. */
426 bidi_category_t
427 bidi_get_category (bidi_type_t type)
428 {
429 switch (type)
430 {
431 case UNKNOWN_BT:
432 return UNKNOWN_BC;
433 case STRONG_L:
434 case STRONG_R:
435 case STRONG_AL:
436 case LRE:
437 case LRO:
438 case RLE:
439 case RLO:
440 return STRONG;
441 case PDF: /* ??? really?? */
442 case WEAK_EN:
443 case WEAK_ES:
444 case WEAK_ET:
445 case WEAK_AN:
446 case WEAK_CS:
447 case WEAK_NSM:
448 case WEAK_BN:
449 return WEAK;
450 case NEUTRAL_B:
451 case NEUTRAL_S:
452 case NEUTRAL_WS:
453 case NEUTRAL_ON:
454 return NEUTRAL;
455 default:
456 abort ();
457 }
458 }
459
460 /* FIXME: exceedingly temporary! Should consult the Unicode database
461 of character properties. */
462 int
463 bidi_mirror_char (int c)
464 {
465 static const char mirrored_pairs[] = "()<>[]{}";
466 const char *p = strchr (mirrored_pairs, c);
467
468 if (p)
469 {
470 size_t i = p - mirrored_pairs;
471
472 if ((i & 1) == 0)
473 return mirrored_pairs[i + 1];
474 else
475 return mirrored_pairs[i - 1];
476 }
477 return c;
478 }
479
480 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
481 copies the part of the level stack that is actually in use. */
482 static inline void
483 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
484 {
485 int save_first_elt = to->first_elt;
486 int i;
487
488 /* Copy everything except the level stack. */
489 memcpy (to, from, ((int)&((struct bidi_it *)0)->level_stack[0]));
490 to->first_elt = save_first_elt;
491 if (to->first_elt != 0 && to->first_elt != 1)
492 to->first_elt = 0;
493
494 /* Copy the active part of the level stack. */
495 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
496 for (i = 1; i <= from->stack_idx; i++)
497 to->level_stack[i] = from->level_stack[i];
498 }
499
500 /* Caching the bidi iterator states. */
501
502 static struct bidi_it bidi_cache[1000]; /* FIXME: make this dynamically allocated! */
503 static int bidi_cache_idx;
504 static int bidi_cache_last_idx;
505
506 static inline void
507 bidi_cache_reset (void)
508 {
509 bidi_cache_idx = 0;
510 bidi_cache_last_idx = -1;
511 }
512
513 static inline void
514 bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
515 {
516 int current_scan_dir = bidi_it->scan_dir;
517
518 if (idx < 0 || idx >= bidi_cache_idx)
519 abort ();
520
521 bidi_copy_it (bidi_it, &bidi_cache[idx]);
522 bidi_it->scan_dir = current_scan_dir;
523 bidi_cache_last_idx = idx;
524 }
525
526 /* Find a cached state with a given CHARPOS and resolved embedding
527 level less or equal to LEVEL. if LEVEL is -1, disregard the
528 resolved levels in cached states. DIR, if non-zero, means search
529 in that direction from the last cache hit. */
530 static inline int
531 bidi_cache_search (int charpos, int level, int dir)
532 {
533 int i, i_start;
534
535 if (bidi_cache_idx)
536 {
537 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
538 dir = -1;
539 else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
540 dir = 1;
541 if (dir)
542 i_start = bidi_cache_last_idx;
543 else
544 {
545 dir = -1;
546 i_start = bidi_cache_idx - 1;
547 }
548
549 if (dir < 0)
550 {
551 /* Linear search for now; FIXME! */
552 for (i = i_start; i >= 0; i--)
553 if (bidi_cache[i].charpos == charpos
554 && (level == -1 || bidi_cache[i].resolved_level <= level))
555 return i;
556 }
557 else
558 {
559 for (i = i_start; i < bidi_cache_idx; i++)
560 if (bidi_cache[i].charpos == charpos
561 && (level == -1 || bidi_cache[i].resolved_level <= level))
562 return i;
563 }
564 }
565
566 return -1;
567 }
568
569 /* Find a cached state where the resolved level changes to a value
570 that is lower than LEVEL, and return its cache slot index. DIR is
571 the direction to search, starting with the last used cache slot.
572 BEFORE, if non-zero, means return the index of the slot that is
573 ``before'' the level change in the search direction. That is,
574 given the cached levels like this:
575
576 1122333442211
577 AB C
578
579 and assuming we are at the position cached at the slot marked with
580 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
581 index of slot B or A, depending whether BEFORE is, respectively,
582 non-zero or zero. */
583 static int
584 bidi_cache_find_level_change (int level, int dir, int before)
585 {
586 if (bidi_cache_idx)
587 {
588 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
589 int incr = before ? 1 : 0;
590
591 if (!dir)
592 dir = -1;
593 else if (!incr)
594 i += dir;
595
596 if (dir < 0)
597 {
598 while (i >= incr)
599 {
600 if (bidi_cache[i - incr].resolved_level >= 0
601 && bidi_cache[i - incr].resolved_level < level)
602 return i;
603 i--;
604 }
605 }
606 else
607 {
608 while (i < bidi_cache_idx - incr)
609 {
610 if (bidi_cache[i + incr].resolved_level >= 0
611 && bidi_cache[i + incr].resolved_level < level)
612 return i;
613 i++;
614 }
615 }
616 }
617
618 return -1;
619 }
620
621 static inline void
622 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
623 {
624 int idx;
625
626 /* We should never cache on backward scans. */
627 if (bidi_it->scan_dir == -1)
628 abort ();
629 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
630
631 if (idx < 0)
632 {
633 idx = bidi_cache_idx;
634 if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1)
635 abort ();
636 bidi_copy_it (&bidi_cache[idx], bidi_it);
637 if (!resolved)
638 bidi_cache[idx].resolved_level = -1;
639 }
640 else
641 {
642 /* Copy only the members which could have changed, to avoid
643 costly copying of the entire struct. */
644 bidi_cache[idx].type = bidi_it->type;
645 bidi_check_type (bidi_it->type);
646 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
647 bidi_check_type (bidi_it->type_after_w1);
648 if (resolved)
649 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
650 else
651 bidi_cache[idx].resolved_level = -1;
652 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
653 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
654 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
655 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
656 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
657 }
658
659 bidi_cache_last_idx = idx;
660 if (idx >= bidi_cache_idx)
661 bidi_cache_idx = idx + 1;
662 }
663
664 static inline bidi_type_t
665 bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
666 {
667 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
668
669 if (i >= 0)
670 {
671 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
672
673 *bidi_it = bidi_cache[i];
674 bidi_cache_last_idx = i;
675 /* Don't let scan direction from from the cached state override
676 the current scan direction. */
677 bidi_it->scan_dir = current_scan_dir;
678 return bidi_it->type;
679 }
680
681 return UNKNOWN_BT;
682 }
683
684 static inline int
685 bidi_peek_at_next_level (struct bidi_it *bidi_it)
686 {
687 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
688 abort ();
689 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
690 }
691
692 /* Return non-zero if buffer's byte position POS is the last character
693 of a paragraph. THIS_CH is the character preceding the one at POS in
694 the buffer. */
695 int
696 bidi_at_paragraph_end (int this_ch, int pos)
697 {
698 int next_ch;
699
700 if (pos >= ZV_BYTE)
701 return 1;
702
703 next_ch = FETCH_CHAR (pos);
704 /* FIXME: This should support all Unicode characters that can end a
705 paragraph. */
706 return (this_ch == '\n' && next_ch == '\n');
707 }
708
709 /* Determine the start-of-run (sor) directional type given the two
710 embedding levels on either side of the run boundary. Also, update
711 the saved info about previously seen characters, since that info is
712 generally valid for a single level run. */
713 static inline void
714 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
715 {
716 int higher_level = level_before > level_after ? level_before : level_after;
717
718 /* The prev_was_pdf gork is required for when we have several PDFs
719 in a row. In that case, we want to compute the sor type for the
720 next level run only once: when we see the first PDF. That's
721 because the sor type depends only on the higher of the two levels
722 that we find on the two sides of the level boundary (see UAX#9,
723 clause X10), and so we don't need to know the final embedding
724 level to which we descend after processing all the PDFs. */
725 if (level_before < level_after || !bidi_it->prev_was_pdf)
726 /* FIXME: should the default sor direction be user selectable? */
727 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
728 if (level_before > level_after)
729 bidi_it->prev_was_pdf = 1;
730
731 bidi_it->prev.type = UNKNOWN_BT;
732 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
733 bidi_it->last_strong.orig_type = UNKNOWN_BT;
734 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
735 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
736 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
737 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
738 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
739 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
740 }
741
742 void
743 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
744 {
745 int pos = bidi_it->charpos, bytepos = bidi_it->bytepos;
746 int ch, ch_len;
747
748 /* We should never be called at EOB or before BEGV. */
749 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
750 abort ();
751
752 /* We should always be called at the beginning of a new
753 paragraph. */
754 if (!(bytepos == BEGV_BYTE
755 || FETCH_CHAR (bytepos) == '\n'
756 || FETCH_CHAR (bytepos - 1) == '\n'))
757 abort ();
758
759 ch = FETCH_CHAR (bytepos);
760 ch_len = CHAR_BYTES (ch);
761 bidi_it->level_stack[0].level = 0; /* default for L2R */
762 if (dir == R2L)
763 bidi_it->level_stack[0].level = 1;
764 else if (dir == NEUTRAL_DIR) /* P2 */
765 {
766 bidi_type_t type;
767
768 /* FIXME: should actually go to where the paragraph begins and
769 start the loop below from there, since UAX#9 says to find the
770 first strong directional character in the paragraph. */
771
772 for (type = bidi_get_type (ch), pos++, bytepos += ch_len;
773 /* NOTE: UAX#9 says to search only for L, AL, or R types of
774 characters, and ignore RLE, RLO, LRE, and LRO. However,
775 I'm not sure it makes sense to omit those 4; should try
776 with and without that to see the effect. */
777 (bidi_get_category (type) != STRONG)
778 || (bidi_ignore_explicit_marks_for_paragraph_level
779 && (type == RLE || type == RLO
780 || type == LRE || type == LRO));
781 type = bidi_get_type (ch))
782 {
783 if (type == NEUTRAL_B || bidi_at_paragraph_end (ch, bytepos))
784 break;
785 FETCH_CHAR_ADVANCE (ch, pos, bytepos);
786 }
787 if (type == STRONG_R || type == STRONG_AL) /* P3 */
788 bidi_it->level_stack[0].level = 1;
789 }
790 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
791 bidi_it->resolved_level = bidi_it->level_stack[0].level;
792 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
793 bidi_it->invalid_levels = 0;
794 bidi_it->invalid_rl_levels = -1;
795 bidi_it->new_paragraph = 0;
796 bidi_it->next_en_pos = -1;
797 bidi_it->next_for_ws.type = UNKNOWN_BT;
798 bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */
799
800 bidi_cache_reset ();
801 }
802
803 /* Do whatever UAX#9 clause X8 says should be done at paragraph's end,
804 and set the new paragraph flag in the iterator. */
805 static inline void
806 bidi_set_paragraph_end (struct bidi_it *bidi_it)
807 {
808 bidi_it->invalid_levels = 0;
809 bidi_it->invalid_rl_levels = -1;
810 bidi_it->stack_idx = 0;
811 bidi_it->resolved_level = bidi_it->level_stack[0].level;
812 bidi_it->new_paragraph = 1;
813 }
814
815 /* Initialize the bidi iterator from buffer position CHARPOS. */
816 void
817 bidi_init_it (int charpos, int bytepos, struct bidi_it *bidi_it)
818 {
819 if (! bidi_initialized)
820 bidi_initialize ();
821 bidi_set_paragraph_end (bidi_it);
822 bidi_it->first_elt = 1;
823 bidi_it->charpos = charpos;
824 bidi_it->bytepos = bytepos;
825 bidi_it->type = NEUTRAL_B;
826 bidi_it->type_after_w1 = UNKNOWN_BT;
827 bidi_it->orig_type = UNKNOWN_BT;
828 bidi_it->prev_was_pdf = 0;
829 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
830 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
831 bidi_it->last_strong.orig_type = UNKNOWN_BT;
832 bidi_it->next_for_neutral.charpos = -1;
833 bidi_it->next_for_neutral.type =
834 bidi_it->next_for_neutral.type_after_w1 =
835 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
836 bidi_it->prev_for_neutral.charpos = -1;
837 bidi_it->prev_for_neutral.type =
838 bidi_it->prev_for_neutral.type_after_w1 =
839 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
840 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
841 }
842
843 /* Push the current embedding level and override status; reset the
844 current level to LEVEL and the current override status to OVERRIDE. */
845 static inline void
846 bidi_push_embedding_level (struct bidi_it *bidi_it,
847 int level, bidi_dir_t override)
848 {
849 bidi_it->stack_idx++;
850 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
851 abort ();
852 bidi_it->level_stack[bidi_it->stack_idx].level = level;
853 bidi_it->level_stack[bidi_it->stack_idx].override = override;
854 }
855
856 /* Pop the embedding level and directional override status from the
857 stack, and return the new level. */
858 static inline int
859 bidi_pop_embedding_level (struct bidi_it *bidi_it)
860 {
861 /* UAX#9 says to ignore invalid PDFs. */
862 if (bidi_it->stack_idx > 0)
863 bidi_it->stack_idx--;
864 return bidi_it->level_stack[bidi_it->stack_idx].level;
865 }
866
867 /* Record in SAVED_INFO the information about the current character. */
868 static inline void
869 bidi_remember_char (struct bidi_saved_info *saved_info,
870 struct bidi_it *bidi_it)
871 {
872 saved_info->charpos = bidi_it->charpos;
873 saved_info->bytepos = bidi_it->bytepos;
874 saved_info->type = bidi_it->type;
875 bidi_check_type (bidi_it->type);
876 saved_info->type_after_w1 = bidi_it->type_after_w1;
877 bidi_check_type (bidi_it->type_after_w1);
878 saved_info->orig_type = bidi_it->orig_type;
879 bidi_check_type (bidi_it->orig_type);
880 }
881
882 /* Resolve the type of a neutral character according to the type of
883 surrounding strong text and the current embedding level. */
884 static inline bidi_type_t
885 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
886 {
887 /* N1: European and Arabic numbers are treated as though they were R. */
888 if (next_type == WEAK_EN || next_type == WEAK_AN)
889 next_type = STRONG_R;
890 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
891 prev_type = STRONG_R;
892
893 if (next_type == prev_type) /* N1 */
894 return next_type;
895 else if ((lev & 1) == 0) /* N2 */
896 return STRONG_L;
897 else
898 return STRONG_R;
899 }
900
901 static inline int
902 bidi_explicit_dir_char (int c)
903 {
904 /* FIXME: this should be replaced with a lookup table with suitable
905 bits set, like standard C ctype macros do. */
906 return (c == LRE_CHAR || c == LRO_CHAR
907 || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
908 }
909
910 /* A helper function for bidi_resolve_explicit. It advances to the
911 next character in logical order and determines the new embedding
912 level and directional override, but does not take into account
913 empty embeddings. */
914 static int
915 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
916 {
917 int curchar;
918 bidi_type_t type;
919 int current_level;
920 int new_level;
921 bidi_dir_t override;
922
923 if (bidi_it->bytepos < BEGV_BYTE /* after reseat to BEGV? */
924 || bidi_it->first_elt)
925 {
926 bidi_it->first_elt = 0;
927 if (bidi_it->charpos < BEGV)
928 bidi_it->charpos = BEGV;
929 bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos);
930 }
931 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
932 {
933 bidi_it->charpos++;
934 if (bidi_it->ch_len == 0)
935 abort ();
936 bidi_it->bytepos += bidi_it->ch_len;
937 }
938
939 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
940 override = bidi_it->level_stack[bidi_it->stack_idx].override;
941 new_level = current_level;
942
943 /* in case it is a unibyte character (not yet implemented) */
944 /* _fetch_multibyte_char_len = 1; */
945 if (bidi_it->bytepos >= ZV_BYTE)
946 {
947 curchar = BIDI_EOB;
948 bidi_it->ch_len = 1;
949 }
950 else
951 {
952 curchar = FETCH_CHAR (bidi_it->bytepos);
953 bidi_it->ch_len = CHAR_BYTES (curchar);
954 }
955 bidi_it->ch = curchar;
956
957 type = bidi_get_type (curchar);
958 bidi_it->orig_type = type;
959 bidi_check_type (bidi_it->orig_type);
960
961 if (type != PDF)
962 bidi_it->prev_was_pdf = 0;
963
964 bidi_it->type_after_w1 = UNKNOWN_BT;
965
966 switch (type)
967 {
968 case RLE: /* X2 */
969 case RLO: /* X4 */
970 bidi_it->type_after_w1 = type;
971 bidi_check_type (bidi_it->type_after_w1);
972 type = WEAK_BN; /* X9/Retaining */
973 if (bidi_it->ignore_bn_limit <= 0)
974 {
975 if (current_level <= BIDI_MAXLEVEL - 4)
976 {
977 /* Compute the least odd embedding level greater than
978 the current level. */
979 new_level = ((current_level + 1) & ~1) + 1;
980 if (bidi_it->type_after_w1 == RLE)
981 override = NEUTRAL_DIR;
982 else
983 override = R2L;
984 if (current_level == BIDI_MAXLEVEL - 4)
985 bidi_it->invalid_rl_levels = 0;
986 bidi_push_embedding_level (bidi_it, new_level, override);
987 }
988 else
989 {
990 bidi_it->invalid_levels++;
991 /* See the commentary about invalid_rl_levels below. */
992 if (bidi_it->invalid_rl_levels < 0)
993 bidi_it->invalid_rl_levels = 0;
994 bidi_it->invalid_rl_levels++;
995 }
996 }
997 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
998 || bidi_it->next_en_pos > bidi_it->charpos)
999 type = WEAK_EN;
1000 break;
1001 case LRE: /* X3 */
1002 case LRO: /* X5 */
1003 bidi_it->type_after_w1 = type;
1004 bidi_check_type (bidi_it->type_after_w1);
1005 type = WEAK_BN; /* X9/Retaining */
1006 if (bidi_it->ignore_bn_limit <= 0)
1007 {
1008 if (current_level <= BIDI_MAXLEVEL - 5)
1009 {
1010 /* Compute the least even embedding level greater than
1011 the current level. */
1012 new_level = ((current_level + 2) & ~1);
1013 if (bidi_it->type_after_w1 == LRE)
1014 override = NEUTRAL_DIR;
1015 else
1016 override = L2R;
1017 bidi_push_embedding_level (bidi_it, new_level, override);
1018 }
1019 else
1020 {
1021 bidi_it->invalid_levels++;
1022 /* invalid_rl_levels counts invalid levels encountered
1023 while the embedding level was already too high for
1024 LRE/LRO, but not for RLE/RLO. That is because
1025 there may be exactly one PDF which we should not
1026 ignore even though invalid_levels is non-zero.
1027 invalid_rl_levels helps to know what PDF is
1028 that. */
1029 if (bidi_it->invalid_rl_levels >= 0)
1030 bidi_it->invalid_rl_levels++;
1031 }
1032 }
1033 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1034 || bidi_it->next_en_pos > bidi_it->charpos)
1035 type = WEAK_EN;
1036 break;
1037 case PDF: /* X7 */
1038 bidi_it->type_after_w1 = type;
1039 bidi_check_type (bidi_it->type_after_w1);
1040 type = WEAK_BN; /* X9/Retaining */
1041 if (bidi_it->ignore_bn_limit <= 0)
1042 {
1043 if (!bidi_it->invalid_rl_levels)
1044 {
1045 new_level = bidi_pop_embedding_level (bidi_it);
1046 bidi_it->invalid_rl_levels = -1;
1047 if (bidi_it->invalid_levels)
1048 bidi_it->invalid_levels--;
1049 /* else nothing: UAX#9 says to ignore invalid PDFs */
1050 }
1051 if (!bidi_it->invalid_levels)
1052 new_level = bidi_pop_embedding_level (bidi_it);
1053 else
1054 {
1055 bidi_it->invalid_levels--;
1056 bidi_it->invalid_rl_levels--;
1057 }
1058 }
1059 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1060 || bidi_it->next_en_pos > bidi_it->charpos)
1061 type = WEAK_EN;
1062 break;
1063 default:
1064 /* Nothing. */
1065 break;
1066 }
1067
1068 bidi_it->type = type;
1069 bidi_check_type (bidi_it->type);
1070
1071 return new_level;
1072 }
1073
1074 /* Given an iterator state in BIDI_IT, advance one character position
1075 in the buffer to the next character (in the logical order), resolve
1076 any explicit embeddings and directional overrides, and return the
1077 embedding level of the character after resolving explicit
1078 directives and ignoring empty embeddings. */
1079 static int
1080 bidi_resolve_explicit (struct bidi_it *bidi_it)
1081 {
1082 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1083 int new_level = bidi_resolve_explicit_1 (bidi_it);
1084
1085 if (prev_level < new_level
1086 && bidi_it->type == WEAK_BN
1087 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1088 && bidi_it->ch != BIDI_EOB /* not already at EOB */
1089 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1090 + bidi_it->ch_len)))
1091 {
1092 /* Avoid pushing and popping embedding levels if the level run
1093 is empty, as this breaks level runs where it shouldn't.
1094 UAX#9 removes all the explicit embedding and override codes,
1095 so empty embeddings disappear without a trace. We need to
1096 behave as if we did the same. */
1097 struct bidi_it saved_it;
1098 int level = prev_level;
1099
1100 bidi_copy_it (&saved_it, bidi_it);
1101
1102 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1103 + bidi_it->ch_len)))
1104 {
1105 level = bidi_resolve_explicit_1 (bidi_it);
1106 }
1107
1108 if (level == prev_level) /* empty embedding */
1109 saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1110 else /* this embedding is non-empty */
1111 saved_it.ignore_bn_limit = -1;
1112
1113 bidi_copy_it (bidi_it, &saved_it);
1114 if (bidi_it->ignore_bn_limit > 0)
1115 {
1116 /* We pushed a level, but we shouldn't have. Undo that. */
1117 if (!bidi_it->invalid_rl_levels)
1118 {
1119 new_level = bidi_pop_embedding_level (bidi_it);
1120 bidi_it->invalid_rl_levels = -1;
1121 if (bidi_it->invalid_levels)
1122 bidi_it->invalid_levels--;
1123 }
1124 if (!bidi_it->invalid_levels)
1125 new_level = bidi_pop_embedding_level (bidi_it);
1126 else
1127 {
1128 bidi_it->invalid_levels--;
1129 bidi_it->invalid_rl_levels--;
1130 }
1131 }
1132 }
1133
1134 /* For when the paragraph end is defined by anything other than a
1135 special Unicode character (a.k.a. ``higher protocols''). */
1136 if (bidi_it->type != NEUTRAL_B)
1137 if (bidi_at_paragraph_end (bidi_it->ch,
1138 bidi_it->bytepos + bidi_it->ch_len))
1139 bidi_it->type = NEUTRAL_B;
1140
1141 if (bidi_it->type == NEUTRAL_B) /* X8 */
1142 {
1143 bidi_set_paragraph_end (bidi_it);
1144 bidi_it->type_after_w1 = bidi_it->type; /* needed below and in L1 */
1145 bidi_check_type (bidi_it->type_after_w1);
1146 }
1147
1148 return new_level;
1149 }
1150
1151 /* Advance in the buffer, resolve weak types and return the type of
1152 the next character after weak type resolution. */
1153 bidi_type_t
1154 bidi_resolve_weak (struct bidi_it *bidi_it)
1155 {
1156 bidi_type_t type;
1157 bidi_dir_t override;
1158 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1159 int new_level = bidi_resolve_explicit (bidi_it);
1160 int next_char;
1161 bidi_type_t type_of_next;
1162 struct bidi_it saved_it;
1163
1164 type = bidi_it->type;
1165 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1166
1167 if (type == UNKNOWN_BT
1168 || type == LRE
1169 || type == LRO
1170 || type == RLE
1171 || type == RLO
1172 || type == PDF)
1173 abort ();
1174
1175 if (new_level != prev_level
1176 || bidi_it->type == NEUTRAL_B)
1177 {
1178 /* We've got a new embedding level run, compute the directional
1179 type of sor and initialize per-run variables (UAX#9, clause
1180 X10). */
1181 bidi_set_sor_type (bidi_it, prev_level, new_level);
1182 }
1183 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1184 || type == WEAK_BN || type == STRONG_AL)
1185 bidi_it->type_after_w1 = type; /* needed in L1 */
1186 bidi_check_type (bidi_it->type_after_w1);
1187
1188 /* Level and directional override status are already recorded in
1189 bidi_it, and do not need any change; see X6. */
1190 if (override == R2L) /* X6 */
1191 type = STRONG_R;
1192 else if (override == L2R)
1193 type = STRONG_L;
1194 else if (type == STRONG_AL)
1195 type = STRONG_R; /* W3 */
1196 else if (type == WEAK_NSM) /* W1 */
1197 {
1198 /* Note that we don't need to consider the case where the prev
1199 character has its type overridden by an RLO or LRO: such
1200 characters are outside the current level run, and thus not
1201 relevant to this NSM. Thus, NSM gets the orig_type of the
1202 previous character. */
1203 if (bidi_it->prev.type != UNKNOWN_BT)
1204 type = bidi_it->prev.orig_type;
1205 else if (bidi_it->sor == R2L)
1206 type = STRONG_R;
1207 else if (bidi_it->sor == L2R)
1208 type = STRONG_L;
1209 else /* shouldn't happen! */
1210 abort ();
1211 if (type == WEAK_EN /* W2 after W1 */
1212 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1213 type = WEAK_AN;
1214 }
1215 else if (type == WEAK_EN /* W2 */
1216 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1217 type = WEAK_AN;
1218 else if ((type == WEAK_ES
1219 && (bidi_it->prev.type_after_w1 == WEAK_EN /* W4 */
1220 && (bidi_it->prev.orig_type == WEAK_EN
1221 || bidi_it->prev.orig_type == WEAK_NSM))) /* aft W1 */
1222 || (type == WEAK_CS
1223 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1224 && (bidi_it->prev.orig_type == WEAK_EN /* W4 */
1225 || bidi_it->prev.orig_type == WEAK_NSM)) /* a/W1 */
1226 || bidi_it->prev.type_after_w1 == WEAK_AN))) /* W4 */
1227 {
1228 next_char =
1229 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1230 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1231 type_of_next = bidi_get_type (next_char);
1232
1233 if (type_of_next == WEAK_BN
1234 || bidi_explicit_dir_char (next_char))
1235 {
1236 bidi_copy_it (&saved_it, bidi_it);
1237 while (bidi_resolve_explicit (bidi_it) == new_level
1238 && bidi_it->type == WEAK_BN)
1239 ;
1240 type_of_next = bidi_it->type;
1241 bidi_copy_it (bidi_it, &saved_it);
1242 }
1243
1244 /* If the next character is EN, but the last strong-type
1245 character is AL, that next EN will be changed to AN when we
1246 process it in W2 above. So in that case, this ES should not
1247 be changed into EN. */
1248 if (type == WEAK_ES
1249 && type_of_next == WEAK_EN
1250 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1251 type = WEAK_EN;
1252 else if (type == WEAK_CS)
1253 {
1254 if (bidi_it->prev.type_after_w1 == WEAK_AN
1255 && (type_of_next == WEAK_AN
1256 /* If the next character is EN, but the last
1257 strong-type character is AL, EN will be later
1258 changed to AN when we process it in W2 above. So
1259 in that case, this ES should not be changed into
1260 EN. */
1261 || (type_of_next == WEAK_EN
1262 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1263 type = WEAK_AN;
1264 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1265 && type_of_next == WEAK_EN
1266 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1267 type = WEAK_EN;
1268 }
1269 }
1270 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1271 || type == WEAK_BN) /* W5/Retaining */
1272 {
1273 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN with EN before it */
1274 || bidi_it->next_en_pos > bidi_it->charpos)
1275 type = WEAK_EN;
1276 /* W5: ET with EN after it. */
1277 else
1278 {
1279 int en_pos = bidi_it->charpos + 1;
1280
1281 next_char =
1282 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1283 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1284 type_of_next = bidi_get_type (next_char);
1285
1286 if (type_of_next == WEAK_ET
1287 || type_of_next == WEAK_BN
1288 || bidi_explicit_dir_char (next_char))
1289 {
1290 bidi_copy_it (&saved_it, bidi_it);
1291 while (bidi_resolve_explicit (bidi_it) == new_level
1292 && (bidi_it->type == WEAK_BN || bidi_it->type == WEAK_ET))
1293 ;
1294 type_of_next = bidi_it->type;
1295 en_pos = bidi_it->charpos;
1296 bidi_copy_it (bidi_it, &saved_it);
1297 }
1298 if (type_of_next == WEAK_EN)
1299 {
1300 /* If the last strong character is AL, the EN we've
1301 found will become AN when we get to it (W2). */
1302 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1303 {
1304 type = WEAK_EN;
1305 /* Remember this EN position, to speed up processing
1306 of the next ETs. */
1307 bidi_it->next_en_pos = en_pos;
1308 }
1309 else if (type == WEAK_BN)
1310 type = NEUTRAL_ON; /* W6/Retaining */
1311 }
1312 }
1313 }
1314
1315 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1316 || (type == WEAK_BN
1317 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1318 || bidi_it->prev.type_after_w1 == WEAK_ES
1319 || bidi_it->prev.type_after_w1 == WEAK_ET)))
1320 type = NEUTRAL_ON;
1321
1322 /* Store the type we've got so far, before we clobber it with strong
1323 types in W7 and while resolving neutral types. But leave alone
1324 the original types that were recorded above, because we will need
1325 them for the L1 clause. */
1326 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1327 bidi_it->type_after_w1 = type;
1328 bidi_check_type (bidi_it->type_after_w1);
1329
1330 if (type == WEAK_EN) /* W7 */
1331 {
1332 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1333 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1334 type = STRONG_L;
1335 }
1336
1337 bidi_it->type = type;
1338 bidi_check_type (bidi_it->type);
1339 return type;
1340 }
1341
1342 bidi_type_t
1343 bidi_resolve_neutral (struct bidi_it *bidi_it)
1344 {
1345 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1346 bidi_type_t type = bidi_resolve_weak (bidi_it);
1347 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1348
1349 if (!(type == STRONG_R
1350 || type == STRONG_L
1351 || type == WEAK_BN
1352 || type == WEAK_EN
1353 || type == WEAK_AN
1354 || type == NEUTRAL_B
1355 || type == NEUTRAL_S
1356 || type == NEUTRAL_WS
1357 || type == NEUTRAL_ON))
1358 abort ();
1359
1360 if (bidi_get_category (type) == NEUTRAL
1361 || (type == WEAK_BN && prev_level == current_level))
1362 {
1363 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1364 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1365 bidi_it->next_for_neutral.type,
1366 current_level);
1367 else
1368 {
1369 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1370 the assumption of batch-style processing; see clauses W4,
1371 W5, and especially N1, which require to look far forward
1372 (as well as back) in the buffer. May the fleas of a
1373 thousand camels infest the armpits of those who design
1374 supposedly general-purpose algorithms by looking at their
1375 own implementations, and fail to consider other possible
1376 implementations! */
1377 struct bidi_it saved_it;
1378 bidi_type_t next_type;
1379
1380 if (bidi_it->scan_dir == -1)
1381 abort ();
1382
1383 bidi_copy_it (&saved_it, bidi_it);
1384 /* Scan the text forward until we find the first non-neutral
1385 character, and then use that to resolve the neutral we
1386 are dealing with now. We also cache the scanned iterator
1387 states, to salvage some of the effort later. */
1388 bidi_cache_iterator_state (bidi_it, 0);
1389 do {
1390 /* Record the info about the previous character, so that
1391 it will be cached below with this state. */
1392 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1393 && bidi_it->type != WEAK_BN)
1394 bidi_remember_char (&bidi_it->prev, bidi_it);
1395 type = bidi_resolve_weak (bidi_it);
1396 /* Paragraph separators have their levels fully resolved
1397 at this point, so cache them as resolved. */
1398 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1399 /* FIXME: implement L1 here, by testing for a newline and
1400 resetting the level for any sequence of whitespace
1401 characters adjacent to it. */
1402 } while (!(type == NEUTRAL_B
1403 || (type != WEAK_BN
1404 && bidi_get_category (type) != NEUTRAL)
1405 /* This is all per level run, so stop when we
1406 reach the end of this level run. */
1407 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1408 current_level));
1409
1410 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1411
1412 switch (type)
1413 {
1414 case STRONG_L:
1415 case STRONG_R:
1416 case STRONG_AL:
1417 next_type = type;
1418 break;
1419 case WEAK_EN:
1420 case WEAK_AN:
1421 /* N1: ``European and Arabic numbers are treated as
1422 though they were R.'' */
1423 next_type = STRONG_R;
1424 saved_it.next_for_neutral.type = STRONG_R;
1425 break;
1426 case WEAK_BN:
1427 if (!bidi_explicit_dir_char (bidi_it->ch))
1428 abort (); /* can't happen: BNs are skipped */
1429 /* FALLTHROUGH */
1430 case NEUTRAL_B:
1431 /* Marched all the way to the end of this level run.
1432 We need to use the eor type, whose information is
1433 stored by bidi_set_sor_type in the prev_for_neutral
1434 member. */
1435 if (saved_it.type != WEAK_BN
1436 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1437 {
1438 next_type = bidi_it->prev_for_neutral.type;
1439 saved_it.next_for_neutral.type = next_type;
1440 bidi_check_type (next_type);
1441 }
1442 else
1443 {
1444 /* This is a BN which does not adjoin neutrals.
1445 Leave its type alone. */
1446 bidi_copy_it (bidi_it, &saved_it);
1447 return bidi_it->type;
1448 }
1449 break;
1450 default:
1451 abort ();
1452 }
1453 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1454 next_type, current_level);
1455 saved_it.type = type;
1456 bidi_check_type (type);
1457 bidi_copy_it (bidi_it, &saved_it);
1458 }
1459 }
1460 return type;
1461 }
1462
1463 /* Given an iterator state in BIDI_IT, advance one character position
1464 in the buffer to the next character (in the logical order), resolve
1465 the bidi type of that next character, and return that type. */
1466 bidi_type_t
1467 bidi_type_of_next_char (struct bidi_it *bidi_it)
1468 {
1469 bidi_type_t type;
1470
1471 /* This should always be called during a forward scan. */
1472 if (bidi_it->scan_dir != 1)
1473 abort ();
1474
1475 /* Reset the limit until which to ignore BNs if we step out of the
1476 area where we found only empty levels. */
1477 if ((bidi_it->ignore_bn_limit > 0
1478 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1479 || (bidi_it->ignore_bn_limit == -1
1480 && !bidi_explicit_dir_char (bidi_it->ch)))
1481 bidi_it->ignore_bn_limit = 0;
1482
1483 type = bidi_resolve_neutral (bidi_it);
1484
1485 return type;
1486 }
1487
1488 /* Given an iterator state BIDI_IT, advance one character position in
1489 the buffer to the next character (in the logical order), resolve
1490 the embedding and implicit levels of that next character, and
1491 return the resulting level. */
1492 int
1493 bidi_level_of_next_char (struct bidi_it *bidi_it)
1494 {
1495 bidi_type_t type;
1496 int level, prev_level = -1;
1497 struct bidi_saved_info next_for_neutral;
1498
1499 if (bidi_it->scan_dir == 1)
1500 {
1501 /* There's no sense in trying to advance if we hit end of text. */
1502 if (bidi_it->ch == BIDI_EOB)
1503 return bidi_it->resolved_level;
1504
1505 /* Record the info about the previous character. */
1506 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1507 && bidi_it->type != WEAK_BN)
1508 bidi_remember_char (&bidi_it->prev, bidi_it);
1509 if (bidi_it->type_after_w1 == STRONG_R
1510 || bidi_it->type_after_w1 == STRONG_L
1511 || bidi_it->type_after_w1 == STRONG_AL)
1512 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1513 /* FIXME: it sounds like we don't need both prev and
1514 prev_for_neutral members, but I'm leaving them both for now. */
1515 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1516 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1517 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1518
1519 /* If we overstepped the characters used for resolving neutrals
1520 and whitespace, invalidate their info in the iterator. */
1521 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1522 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1523 if (bidi_it->next_en_pos >= 0
1524 && bidi_it->charpos >= bidi_it->next_en_pos)
1525 bidi_it->next_en_pos = -1;
1526 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1527 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1528 bidi_it->next_for_ws.type = UNKNOWN_BT;
1529
1530 /* This must be taken before we fill the iterator with the info
1531 about the next char. If we scan backwards, the iterator
1532 state must be already cached, so there's no need to know the
1533 embedding level of the previous character, since we will be
1534 returning to our caller shortly. */
1535 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1536 }
1537 next_for_neutral = bidi_it->next_for_neutral;
1538
1539 /* Perhaps it is already cached. */
1540 type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1541 if (type != UNKNOWN_BT)
1542 {
1543 /* Don't lose the information for resolving neutrals! The
1544 cached states could have been cached before their
1545 next_for_neutral member was computed. If we are on our way
1546 forward, we can simply take the info from the previous
1547 state. */
1548 if (bidi_it->scan_dir == 1
1549 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1550 bidi_it->next_for_neutral = next_for_neutral;
1551
1552 /* If resolved_level is -1, it means this state was cached
1553 before it was completely resolved, so we cannot return
1554 it. */
1555 if (bidi_it->resolved_level != -1)
1556 return bidi_it->resolved_level;
1557 }
1558 if (bidi_it->scan_dir == -1)
1559 /* If we are going backwards, the iterator state is already cached
1560 from previous scans, and should be fully resolved. */
1561 abort ();
1562
1563 if (type == UNKNOWN_BT)
1564 type = bidi_type_of_next_char (bidi_it);
1565
1566 if (type == NEUTRAL_B)
1567 return bidi_it->resolved_level;
1568
1569 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1570 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1571 || (type == WEAK_BN && prev_level == level))
1572 {
1573 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1574 abort ();
1575
1576 /* If the cached state shows a neutral character, it was not
1577 resolved by bidi_resolve_neutral, so do it now. */
1578 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1579 bidi_it->next_for_neutral.type,
1580 level);
1581 }
1582
1583 if (!(type == STRONG_R
1584 || type == STRONG_L
1585 || type == WEAK_BN
1586 || type == WEAK_EN
1587 || type == WEAK_AN))
1588 abort ();
1589 bidi_it->type = type;
1590 bidi_check_type (bidi_it->type);
1591
1592 /* For L1 below, we need to know, for each WS character, whether
1593 it belongs to a sequence of WS characters preceeding a newline
1594 or a TAB or a paragraph separator. */
1595 if (bidi_it->orig_type == NEUTRAL_WS
1596 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1597 {
1598 int ch;
1599 int clen = bidi_it->ch_len;
1600 int bpos = bidi_it->bytepos;
1601 int cpos = bidi_it->charpos;
1602 bidi_type_t chtype;
1603
1604 do {
1605 /*_fetch_multibyte_char_len = 1;*/
1606 ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
1607 bpos += clen;
1608 cpos++;
1609 clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1610 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1611 chtype = NEUTRAL_B;
1612 else
1613 chtype = bidi_get_type (ch);
1614 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1615 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1616 bidi_it->next_for_ws.type = chtype;
1617 bidi_check_type (bidi_it->next_for_ws.type);
1618 bidi_it->next_for_ws.charpos = cpos;
1619 bidi_it->next_for_ws.bytepos = bpos;
1620 }
1621
1622 /* Resolve implicit levels, with a twist: PDFs get the embedding
1623 level of the enbedding they terminate. See below for the
1624 reason. */
1625 if (bidi_it->orig_type == PDF
1626 /* Don't do this if this formatting code didn't change the
1627 embedding level due to invalid or empty embeddings. */
1628 && prev_level != level)
1629 {
1630 /* Don't look in UAX#9 for the reason for this: it's our own
1631 private quirk. The reason is that we want the formatting
1632 codes to be delivered so that they bracket the text of their
1633 embedding. For example, given the text
1634
1635 {RLO}teST{PDF}
1636
1637 we want it to be displayed as
1638
1639 {RLO}STet{PDF}
1640
1641 not as
1642
1643 STet{RLO}{PDF}
1644
1645 which will result because we bump up the embedding level as
1646 soon as we see the RLO and pop it as soon as we see the PDF,
1647 so RLO itself has the same embedding level as "teST", and
1648 thus would be normally delivered last, just before the PDF.
1649 The switch below fiddles with the level of PDF so that this
1650 ugly side effect does not happen.
1651
1652 (This is, of course, only important if the formatting codes
1653 are actually displayed, but Emacs does need to display them
1654 if the user wants to.) */
1655 level = prev_level;
1656 }
1657 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1658 || bidi_it->orig_type == NEUTRAL_S
1659 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1660 /* || bidi_it->ch == LINESEP_CHAR */
1661 || (bidi_it->orig_type == NEUTRAL_WS
1662 && (bidi_it->next_for_ws.type == NEUTRAL_B
1663 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1664 level = bidi_it->level_stack[0].level;
1665 else if ((level & 1) == 0) /* I1 */
1666 {
1667 if (type == STRONG_R)
1668 level++;
1669 else if (type == WEAK_EN || type == WEAK_AN)
1670 level += 2;
1671 }
1672 else /* I2 */
1673 {
1674 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1675 level++;
1676 }
1677
1678 bidi_it->resolved_level = level;
1679 return level;
1680 }
1681
1682 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
1683 non-zero, we are at the end of a level, and we need to prepare to
1684 resume the scan of the lower level.
1685
1686 If this level's other edge is cached, we simply jump to it, filling
1687 the iterator structure with the iterator state on the other edge.
1688 Otherwise, we walk the buffer until we come back to the same level
1689 as LEVEL.
1690
1691 Note: we are not talking here about a ``level run'' in the UAX#9
1692 sense of the term, but rather about a ``level'' which includes
1693 all the levels higher than it. In other words, given the levels
1694 like this:
1695
1696 11111112222222333333334443343222222111111112223322111
1697 A B C
1698
1699 and assuming we are at point A scanning left to right, this
1700 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1701 at point B. */
1702 static void
1703 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1704 {
1705 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1706 int idx;
1707
1708 /* Try the cache first. */
1709 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1710 bidi_cache_fetch_state (idx, bidi_it);
1711 else
1712 {
1713 int new_level;
1714
1715 if (end_flag)
1716 abort (); /* if we are at end of level, its edges must be cached */
1717
1718 bidi_cache_iterator_state (bidi_it, 1);
1719 do {
1720 new_level = bidi_level_of_next_char (bidi_it);
1721 bidi_cache_iterator_state (bidi_it, 1);
1722 } while (new_level >= level);
1723 }
1724 }
1725
1726 void
1727 bidi_get_next_char_visually (struct bidi_it *bidi_it)
1728 {
1729 int old_level, new_level, next_level;
1730 struct bidi_it sentinel;
1731
1732 if (bidi_it->scan_dir == 0)
1733 {
1734 bidi_it->scan_dir = 1; /* default to logical order */
1735 }
1736
1737 if (bidi_it->new_paragraph)
1738 bidi_paragraph_init (bidi_overriding_paragraph_direction, bidi_it);
1739 /* Prepare the sentinel iterator state. */
1740 if (bidi_cache_idx == 0)
1741 {
1742 bidi_copy_it (&sentinel, bidi_it);
1743 if (bidi_it->first_elt)
1744 {
1745 sentinel.charpos--; /* cached charpos needs to be monotonic */
1746 sentinel.bytepos--;
1747 sentinel.ch = '\n'; /* doesn't matter, but why not? */
1748 sentinel.ch_len = 1;
1749 }
1750 }
1751
1752 old_level = bidi_it->resolved_level;
1753 new_level = bidi_level_of_next_char (bidi_it);
1754 if (bidi_it->ch == BIDI_EOB)
1755 return;
1756
1757 /* Reordering of resolved levels (clause L2) is implemented by
1758 jumping to the other edge of the level and flipping direction of
1759 scanning the buffer whenever we find a level change. */
1760 if (new_level != old_level)
1761 {
1762 int ascending = new_level > old_level;
1763 int level_to_search = ascending ? old_level + 1 : old_level;
1764 int incr = ascending ? 1 : -1;
1765 int expected_next_level = old_level + incr;
1766
1767 /* If we don't have anything cached yet, we need to cache the
1768 sentinel state, since we'll need it to record where to jump
1769 when the last non-base level is exhausted. */
1770 if (bidi_cache_idx == 0)
1771 bidi_cache_iterator_state (&sentinel, 1);
1772 /* Jump (or walk) to the other edge of this level. */
1773 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1774 /* Switch scan direction and peek at the next character in the
1775 new direction. */
1776 bidi_it->scan_dir = -bidi_it->scan_dir;
1777
1778 /* The following loop handles the case where the resolved level
1779 jumps by more than one. This is typical for numbers inside a
1780 run of text with left-to-right embedding direction, but can
1781 also happen in other situations. In those cases the decision
1782 where to continue after a level change, and in what direction,
1783 is tricky. For example, given a text like below:
1784
1785 abcdefgh
1786 11336622
1787
1788 (where the numbers below the text show the resolved levels),
1789 the result of reordering according to UAX#9 should be this:
1790
1791 efdcghba
1792
1793 This is implemented by the loop below which flips direction
1794 and jumps to the other edge of the level each time it finds
1795 the new level not to be the expected one. The expected level
1796 is always one more or one less than the previous one. */
1797 next_level = bidi_peek_at_next_level (bidi_it);
1798 while (next_level != expected_next_level)
1799 {
1800 expected_next_level += incr;
1801 level_to_search += incr;
1802 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1803 bidi_it->scan_dir = -bidi_it->scan_dir;
1804 next_level = bidi_peek_at_next_level (bidi_it);
1805 }
1806
1807 /* Finally, deliver the next character in the new direction. */
1808 next_level = bidi_level_of_next_char (bidi_it);
1809 }
1810
1811 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1812 {
1813 /* If we are at paragraph's base embedding level and beyond the
1814 last cached position, the cache's job is done and we can
1815 discard it. */
1816 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1817 && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1818 bidi_cache_reset ();
1819 /* But as long as we are caching during forward scan, we must
1820 cache each state, or else the cache integrity will be
1821 compromised: it assumes cached states correspond to buffer
1822 positions 1:1. */
1823 else
1824 bidi_cache_iterator_state (bidi_it, 1);
1825 }
1826 }
1827
1828 /* This is meant to be called from within the debugger, whenever you
1829 wish to examine the cache contents. */
1830 void
1831 bidi_dump_cached_states (void)
1832 {
1833 int i;
1834 int ndigits = 1;
1835
1836 if (bidi_cache_idx == 0)
1837 {
1838 fprintf (stderr, "The cache is empty.\n");
1839 return;
1840 }
1841 fprintf (stderr, "Total of %d state%s in cache:\n",
1842 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1843
1844 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1845 ndigits++;
1846 fputs ("ch ", stderr);
1847 for (i = 0; i < bidi_cache_idx; i++)
1848 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1849 fputs ("\n", stderr);
1850 fputs ("lvl ", stderr);
1851 for (i = 0; i < bidi_cache_idx; i++)
1852 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1853 fputs ("\n", stderr);
1854 fputs ("pos ", stderr);
1855 for (i = 0; i < bidi_cache_idx; i++)
1856 fprintf (stderr, "%*d", ndigits, bidi_cache[i].charpos);
1857 fputs ("\n", stderr);
1858 }