b146d562c37dbcb0eaecaf3ab2c3fd1f470c8df7
[bpt/emacs.git] / src / bidi.c
1 /* Low-level bidirectional buffer-scanning functions for GNU Emacs.
2 Copyright (C) 2000, 2001, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 /* Written by Eli Zaretskii <eliz@gnu.org>.
23
24 A sequential implementation of the Unicode Bidirectional algorithm,
25 as per UAX#9, a part of the Unicode Standard.
26
27 Unlike the reference and most other implementations, this one is
28 designed to be called once for every character in the buffer.
29
30 The main entry point is bidi_get_next_char_visually. Each time it
31 is called, it finds the next character in the visual order, and
32 returns its information in a special structure. The caller is then
33 expected to process this character for display or any other
34 purposes, and call bidi_get_next_char_visually for the next
35 character. See the comments in bidi_get_next_char_visually for
36 more details about its algorithm that finds the next visual-order
37 character by resolving their levels on the fly.
38
39 If you want to understand the code, you will have to read it
40 together with the relevant portions of UAX#9. The comments include
41 references to UAX#9 rules, for that very reason.
42
43 A note about references to UAX#9 rules: if the reference says
44 something like "X9/Retaining", it means that you need to refer to
45 rule X9 and to its modifications decribed in the "Implementation
46 Notes" section of UAX#9, under "Retaining Format Codes". */
47
48 #ifdef HAVE_CONFIG_H
49 #include <config.h>
50 #endif
51
52 #include <stdio.h>
53
54 #ifdef HAVE_STRING_H
55 #include <string.h>
56 #endif
57
58 #include "lisp.h"
59 #include "buffer.h"
60 #include "character.h"
61 #include "dispextern.h"
62
63 static int bidi_initialized = 0;
64
65 static Lisp_Object bidi_type_table;
66
67 /* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */
68 #define LRM_CHAR 0x200E
69 #define RLM_CHAR 0x200F
70 #define LRE_CHAR 0x202A
71 #define RLE_CHAR 0x202B
72 #define PDF_CHAR 0x202C
73 #define LRO_CHAR 0x202D
74 #define RLO_CHAR 0x202E
75
76 #define BIDI_EOB -1
77 #define BIDI_BOB -2 /* FIXME: Is this needed? */
78
79 /* Local data structures. (Look in dispextern.h for the rest.) */
80
81 /* What we need to know about the current paragraph. */
82 struct bidi_paragraph_info {
83 int start_bytepos; /* byte position where it begins */
84 int end_bytepos; /* byte position where it ends */
85 int embedding_level; /* its basic embedding level */
86 bidi_dir_t base_dir; /* its base direction */
87 };
88
89 /* Data type for describing the bidirectional character categories. */
90 typedef enum {
91 UNKNOWN_BC,
92 NEUTRAL,
93 WEAK,
94 STRONG
95 } bidi_category_t;
96
97 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
98
99 /* FIXME: Should be user-definable. */
100 bidi_dir_t bidi_overriding_paragraph_direction = L2R;
101
102 /* FIXME: Unused? */
103 #define ASCII_BIDI_TYPE_SET(STR, TYPE) \
104 do { \
105 unsigned char *p; \
106 for (p = (STR); *p; p++) \
107 CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \
108 } while (0)
109
110 static void
111 bidi_initialize ()
112 {
113 /* FIXME: This should come from the Unicode Database. */
114 struct {
115 int from, to;
116 bidi_type_t type;
117 } bidi_type[] =
118 { { 0x0000, 0x0008, WEAK_BN },
119 { 0x0009, 0x0000, NEUTRAL_S },
120 { 0x000A, 0x0000, NEUTRAL_B },
121 { 0x000B, 0x0000, NEUTRAL_S },
122 { 0x000C, 0x0000, NEUTRAL_WS },
123 { 0x000D, 0x0000, NEUTRAL_B },
124 { 0x000E, 0x001B, WEAK_BN },
125 { 0x001C, 0x001E, NEUTRAL_B },
126 { 0x001F, 0x0000, NEUTRAL_S },
127 { 0x0020, 0x0000, NEUTRAL_WS },
128 { 0x0021, 0x0022, NEUTRAL_ON },
129 { 0x0023, 0x0025, WEAK_ET },
130 { 0x0026, 0x002A, NEUTRAL_ON },
131 { 0x002B, 0x0000, WEAK_ET },
132 { 0x002C, 0x0000, WEAK_CS },
133 { 0x002D, 0x0000, WEAK_ET },
134 { 0x002E, 0x0000, WEAK_CS },
135 { 0x002F, 0x0000, WEAK_ES },
136 { 0x0030, 0x0039, WEAK_EN },
137 { 0x003A, 0x0000, WEAK_CS },
138 { 0x003B, 0x0040, NEUTRAL_ON },
139 { 0x005B, 0x0060, NEUTRAL_ON },
140 { 0x007B, 0x007E, NEUTRAL_ON },
141 { 0x007F, 0x0084, WEAK_BN },
142 { 0x0085, 0x0000, NEUTRAL_B },
143 { 0x0086, 0x009F, WEAK_BN },
144 { 0x00A0, 0x0000, WEAK_CS },
145 { 0x00A1, 0x0000, NEUTRAL_ON },
146 { 0x00A2, 0x00A5, WEAK_ET },
147 { 0x00A6, 0x00A9, NEUTRAL_ON },
148 { 0x00AB, 0x00AF, NEUTRAL_ON },
149 { 0x00B0, 0x00B1, WEAK_ET },
150 { 0x00B2, 0x00B3, WEAK_EN },
151 { 0x00B4, 0x0000, NEUTRAL_ON },
152 { 0x00B6, 0x00B8, NEUTRAL_ON },
153 { 0x00B9, 0x0000, WEAK_EN },
154 { 0x00BB, 0x00BF, NEUTRAL_ON },
155 { 0x00D7, 0x0000, NEUTRAL_ON },
156 { 0x00F7, 0x0000, NEUTRAL_ON },
157 { 0x02B9, 0x02BA, NEUTRAL_ON },
158 { 0x02C2, 0x02CF, NEUTRAL_ON },
159 { 0x02D2, 0x02DF, NEUTRAL_ON },
160 { 0x02E5, 0x02ED, NEUTRAL_ON },
161 { 0x0300, 0x036F, WEAK_NSM },
162 { 0x0374, 0x0375, NEUTRAL_ON },
163 { 0x037E, 0x0385, NEUTRAL_ON },
164 { 0x0387, 0x0000, NEUTRAL_ON },
165 { 0x03F6, 0x0000, NEUTRAL_ON },
166 { 0x0483, 0x0489, WEAK_NSM },
167 { 0x058A, 0x0000, NEUTRAL_ON },
168 { 0x0591, 0x05BD, WEAK_NSM },
169 { 0x05BE, 0x0000, STRONG_R },
170 { 0x05BF, 0x0000, WEAK_NSM },
171 { 0x05C0, 0x0000, STRONG_R },
172 { 0x05C1, 0x05C2, WEAK_NSM },
173 { 0x05C3, 0x0000, STRONG_R },
174 { 0x05C4, 0x0000, WEAK_NSM },
175 { 0x05D0, 0x05F4, STRONG_R },
176 { 0x060C, 0x0000, WEAK_CS },
177 { 0x061B, 0x064A, STRONG_AL },
178 { 0x064B, 0x0655, WEAK_NSM },
179 { 0x0660, 0x0669, WEAK_AN },
180 { 0x066A, 0x0000, WEAK_ET },
181 { 0x066B, 0x066C, WEAK_AN },
182 { 0x066D, 0x066F, STRONG_AL },
183 { 0x0670, 0x0000, WEAK_NSM },
184 { 0x0671, 0x06D5, STRONG_AL },
185 { 0x06D6, 0x06DC, WEAK_NSM },
186 { 0x06DD, 0x0000, STRONG_AL },
187 { 0x06DE, 0x06E4, WEAK_NSM },
188 { 0x06E5, 0x06E6, STRONG_AL },
189 { 0x06E7, 0x06E8, WEAK_NSM },
190 { 0x06E9, 0x0000, NEUTRAL_ON },
191 { 0x06EA, 0x06ED, WEAK_NSM },
192 { 0x06F0, 0x06F9, WEAK_EN },
193 { 0x06FA, 0x070D, STRONG_AL },
194 { 0x070F, 0x0000, WEAK_BN },
195 { 0x0710, 0x0000, STRONG_AL },
196 { 0x0711, 0x0000, WEAK_NSM },
197 { 0x0712, 0x072C, STRONG_AL },
198 { 0x0730, 0x074A, WEAK_NSM },
199 { 0x0780, 0x07A5, STRONG_AL },
200 { 0x07A6, 0x07B0, WEAK_NSM },
201 { 0x07B1, 0x0000, STRONG_AL },
202 { 0x0901, 0x0902, WEAK_NSM },
203 { 0x093C, 0x0000, WEAK_NSM },
204 { 0x0941, 0x0948, WEAK_NSM },
205 { 0x094D, 0x0000, WEAK_NSM },
206 { 0x0951, 0x0954, WEAK_NSM },
207 { 0x0962, 0x0963, WEAK_NSM },
208 { 0x0981, 0x0000, WEAK_NSM },
209 { 0x09BC, 0x0000, WEAK_NSM },
210 { 0x09C1, 0x09C4, WEAK_NSM },
211 { 0x09CD, 0x0000, WEAK_NSM },
212 { 0x09E2, 0x09E3, WEAK_NSM },
213 { 0x09F2, 0x09F3, WEAK_ET },
214 { 0x0A02, 0x0000, WEAK_NSM },
215 { 0x0A3C, 0x0000, WEAK_NSM },
216 { 0x0A41, 0x0A4D, WEAK_NSM },
217 { 0x0A70, 0x0A71, WEAK_NSM },
218 { 0x0A81, 0x0A82, WEAK_NSM },
219 { 0x0ABC, 0x0000, WEAK_NSM },
220 { 0x0AC1, 0x0AC8, WEAK_NSM },
221 { 0x0ACD, 0x0000, WEAK_NSM },
222 { 0x0B01, 0x0000, WEAK_NSM },
223 { 0x0B3C, 0x0000, WEAK_NSM },
224 { 0x0B3F, 0x0000, WEAK_NSM },
225 { 0x0B41, 0x0B43, WEAK_NSM },
226 { 0x0B4D, 0x0B56, WEAK_NSM },
227 { 0x0B82, 0x0000, WEAK_NSM },
228 { 0x0BC0, 0x0000, WEAK_NSM },
229 { 0x0BCD, 0x0000, WEAK_NSM },
230 { 0x0C3E, 0x0C40, WEAK_NSM },
231 { 0x0C46, 0x0C56, WEAK_NSM },
232 { 0x0CBF, 0x0000, WEAK_NSM },
233 { 0x0CC6, 0x0000, WEAK_NSM },
234 { 0x0CCC, 0x0CCD, WEAK_NSM },
235 { 0x0D41, 0x0D43, WEAK_NSM },
236 { 0x0D4D, 0x0000, WEAK_NSM },
237 { 0x0DCA, 0x0000, WEAK_NSM },
238 { 0x0DD2, 0x0DD6, WEAK_NSM },
239 { 0x0E31, 0x0000, WEAK_NSM },
240 { 0x0E34, 0x0E3A, WEAK_NSM },
241 { 0x0E3F, 0x0000, WEAK_ET },
242 { 0x0E47, 0x0E4E, WEAK_NSM },
243 { 0x0EB1, 0x0000, WEAK_NSM },
244 { 0x0EB4, 0x0EBC, WEAK_NSM },
245 { 0x0EC8, 0x0ECD, WEAK_NSM },
246 { 0x0F18, 0x0F19, WEAK_NSM },
247 { 0x0F35, 0x0000, WEAK_NSM },
248 { 0x0F37, 0x0000, WEAK_NSM },
249 { 0x0F39, 0x0000, WEAK_NSM },
250 { 0x0F3A, 0x0F3D, NEUTRAL_ON },
251 { 0x0F71, 0x0F7E, WEAK_NSM },
252 { 0x0F80, 0x0F84, WEAK_NSM },
253 { 0x0F86, 0x0F87, WEAK_NSM },
254 { 0x0F90, 0x0FBC, WEAK_NSM },
255 { 0x0FC6, 0x0000, WEAK_NSM },
256 { 0x102D, 0x1030, WEAK_NSM },
257 { 0x1032, 0x1037, WEAK_NSM },
258 { 0x1039, 0x0000, WEAK_NSM },
259 { 0x1058, 0x1059, WEAK_NSM },
260 { 0x1680, 0x0000, NEUTRAL_WS },
261 { 0x169B, 0x169C, NEUTRAL_ON },
262 { 0x1712, 0x1714, WEAK_NSM },
263 { 0x1732, 0x1734, WEAK_NSM },
264 { 0x1752, 0x1753, WEAK_NSM },
265 { 0x1772, 0x1773, WEAK_NSM },
266 { 0x17B7, 0x17BD, WEAK_NSM },
267 { 0x17C6, 0x0000, WEAK_NSM },
268 { 0x17C9, 0x17D3, WEAK_NSM },
269 { 0x17DB, 0x0000, WEAK_ET },
270 { 0x1800, 0x180A, NEUTRAL_ON },
271 { 0x180B, 0x180D, WEAK_NSM },
272 { 0x180E, 0x0000, WEAK_BN },
273 { 0x18A9, 0x0000, WEAK_NSM },
274 { 0x1FBD, 0x0000, NEUTRAL_ON },
275 { 0x1FBF, 0x1FC1, NEUTRAL_ON },
276 { 0x1FCD, 0x1FCF, NEUTRAL_ON },
277 { 0x1FDD, 0x1FDF, NEUTRAL_ON },
278 { 0x1FED, 0x1FEF, NEUTRAL_ON },
279 { 0x1FFD, 0x1FFE, NEUTRAL_ON },
280 { 0x2000, 0x200A, NEUTRAL_WS },
281 { 0x200B, 0x200D, WEAK_BN },
282 { 0x200F, 0x0000, STRONG_R },
283 { 0x2010, 0x2027, NEUTRAL_ON },
284 { 0x2028, 0x0000, NEUTRAL_WS },
285 { 0x2029, 0x0000, NEUTRAL_B },
286 { 0x202A, 0x0000, LRE },
287 { 0x202B, 0x0000, RLE },
288 { 0x202C, 0x0000, PDF },
289 { 0x202D, 0x0000, LRO },
290 { 0x202E, 0x0000, RLO },
291 { 0x202F, 0x0000, NEUTRAL_WS },
292 { 0x2030, 0x2034, WEAK_ET },
293 { 0x2035, 0x2057, NEUTRAL_ON },
294 { 0x205F, 0x0000, NEUTRAL_WS },
295 { 0x2060, 0x206F, WEAK_BN },
296 { 0x2070, 0x0000, WEAK_EN },
297 { 0x2074, 0x2079, WEAK_EN },
298 { 0x207A, 0x207B, WEAK_ET },
299 { 0x207C, 0x207E, NEUTRAL_ON },
300 { 0x2080, 0x2089, WEAK_EN },
301 { 0x208A, 0x208B, WEAK_ET },
302 { 0x208C, 0x208E, NEUTRAL_ON },
303 { 0x20A0, 0x20B1, WEAK_ET },
304 { 0x20D0, 0x20EA, WEAK_NSM },
305 { 0x2100, 0x2101, NEUTRAL_ON },
306 { 0x2103, 0x2106, NEUTRAL_ON },
307 { 0x2108, 0x2109, NEUTRAL_ON },
308 { 0x2114, 0x0000, NEUTRAL_ON },
309 { 0x2116, 0x2118, NEUTRAL_ON },
310 { 0x211E, 0x2123, NEUTRAL_ON },
311 { 0x2125, 0x0000, NEUTRAL_ON },
312 { 0x2127, 0x0000, NEUTRAL_ON },
313 { 0x2129, 0x0000, NEUTRAL_ON },
314 { 0x212E, 0x0000, WEAK_ET },
315 { 0x2132, 0x0000, NEUTRAL_ON },
316 { 0x213A, 0x0000, NEUTRAL_ON },
317 { 0x2140, 0x2144, NEUTRAL_ON },
318 { 0x214A, 0x215F, NEUTRAL_ON },
319 { 0x2190, 0x2211, NEUTRAL_ON },
320 { 0x2212, 0x2213, WEAK_ET },
321 { 0x2214, 0x2335, NEUTRAL_ON },
322 { 0x237B, 0x2394, NEUTRAL_ON },
323 { 0x2396, 0x244A, NEUTRAL_ON },
324 { 0x2460, 0x249B, WEAK_EN },
325 { 0x24EA, 0x0000, WEAK_EN },
326 { 0x24EB, 0x2FFB, NEUTRAL_ON },
327 { 0x3000, 0x0000, NEUTRAL_WS },
328 { 0x3001, 0x3004, NEUTRAL_ON },
329 { 0x3008, 0x3020, NEUTRAL_ON },
330 { 0x302A, 0x302F, WEAK_NSM },
331 { 0x3030, 0x0000, NEUTRAL_ON },
332 { 0x3036, 0x3037, NEUTRAL_ON },
333 { 0x303D, 0x303F, NEUTRAL_ON },
334 { 0x3099, 0x309A, WEAK_NSM },
335 { 0x309B, 0x309C, NEUTRAL_ON },
336 { 0x30A0, 0x0000, NEUTRAL_ON },
337 { 0x30FB, 0x0000, NEUTRAL_ON },
338 { 0x3251, 0x325F, NEUTRAL_ON },
339 { 0x32B1, 0x32BF, NEUTRAL_ON },
340 { 0xA490, 0xA4C6, NEUTRAL_ON },
341 { 0xFB1D, 0x0000, STRONG_R },
342 { 0xFB1E, 0x0000, WEAK_NSM },
343 { 0xFB1F, 0xFB28, STRONG_R },
344 { 0xFB29, 0x0000, WEAK_ET },
345 { 0xFB2A, 0xFB4F, STRONG_R },
346 { 0xFB50, 0xFD3D, STRONG_AL },
347 { 0xFD3E, 0xFD3F, NEUTRAL_ON },
348 { 0xFD50, 0xFDFC, STRONG_AL },
349 { 0xFE00, 0xFE23, WEAK_NSM },
350 { 0xFE30, 0xFE4F, NEUTRAL_ON },
351 { 0xFE50, 0x0000, WEAK_CS },
352 { 0xFE51, 0x0000, NEUTRAL_ON },
353 { 0xFE52, 0x0000, WEAK_CS },
354 { 0xFE54, 0x0000, NEUTRAL_ON },
355 { 0xFE55, 0x0000, WEAK_CS },
356 { 0xFE56, 0xFE5E, NEUTRAL_ON },
357 { 0xFE5F, 0x0000, WEAK_ET },
358 { 0xFE60, 0xFE61, NEUTRAL_ON },
359 { 0xFE62, 0xFE63, WEAK_ET },
360 { 0xFE64, 0xFE68, NEUTRAL_ON },
361 { 0xFE69, 0xFE6A, WEAK_ET },
362 { 0xFE6B, 0x0000, NEUTRAL_ON },
363 { 0xFE70, 0xFEFC, STRONG_AL },
364 { 0xFEFF, 0x0000, WEAK_BN },
365 { 0xFF01, 0xFF02, NEUTRAL_ON },
366 { 0xFF03, 0xFF05, WEAK_ET },
367 { 0xFF06, 0xFF0A, NEUTRAL_ON },
368 { 0xFF0B, 0x0000, WEAK_ET },
369 { 0xFF0C, 0x0000, WEAK_CS },
370 { 0xFF0D, 0x0000, WEAK_ET },
371 { 0xFF0E, 0x0000, WEAK_CS },
372 { 0xFF0F, 0x0000, WEAK_ES },
373 { 0xFF10, 0xFF19, WEAK_EN },
374 { 0xFF1A, 0x0000, WEAK_CS },
375 { 0xFF1B, 0xFF20, NEUTRAL_ON },
376 { 0xFF3B, 0xFF40, NEUTRAL_ON },
377 { 0xFF5B, 0xFF65, NEUTRAL_ON },
378 { 0xFFE0, 0xFFE1, WEAK_ET },
379 { 0xFFE2, 0xFFE4, NEUTRAL_ON },
380 { 0xFFE5, 0xFFE6, WEAK_ET },
381 { 0xFFE8, 0xFFEE, NEUTRAL_ON },
382 { 0xFFF9, 0xFFFB, WEAK_BN },
383 { 0xFFFC, 0xFFFD, NEUTRAL_ON },
384 { 0x1D167, 0x1D169, WEAK_NSM },
385 { 0x1D173, 0x1D17A, WEAK_BN },
386 { 0x1D17B, 0x1D182, WEAK_NSM },
387 { 0x1D185, 0x1D18B, WEAK_NSM },
388 { 0x1D1AA, 0x1D1AD, WEAK_NSM },
389 { 0x1D7CE, 0x1D7FF, WEAK_EN },
390 { 0xE0001, 0xE007F, WEAK_BN } };
391 int i;
392
393 bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L));
394 staticpro (&bidi_type_table);
395
396 for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++)
397 char_table_set_range (bidi_type_table, bidi_type[i].from,
398 bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from,
399 make_number (bidi_type[i].type));
400 bidi_initialized = 1;
401 }
402
403 static int
404 bidi_is_arabic_number (int ch)
405 {
406 return 0; /* FIXME! */
407 }
408
409 /* Return the bidi type of a character CH. */
410 bidi_type_t
411 bidi_get_type (int ch)
412 {
413 if (ch == BIDI_EOB)
414 return NEUTRAL_B;
415 return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
416 }
417
418 void
419 bidi_check_type (bidi_type_t type)
420 {
421 if (type < UNKNOWN_BT || type > NEUTRAL_ON)
422 abort ();
423 }
424
425 /* Given a bidi TYPE of a character, return its category. */
426 bidi_category_t
427 bidi_get_category (bidi_type_t type)
428 {
429 switch (type)
430 {
431 case UNKNOWN_BT:
432 return UNKNOWN_BC;
433 case STRONG_L:
434 case STRONG_R:
435 case STRONG_AL:
436 case LRE:
437 case LRO:
438 case RLE:
439 case RLO:
440 return STRONG;
441 case PDF: /* ??? really?? */
442 case WEAK_EN:
443 case WEAK_ES:
444 case WEAK_ET:
445 case WEAK_AN:
446 case WEAK_CS:
447 case WEAK_NSM:
448 case WEAK_BN:
449 return WEAK;
450 case NEUTRAL_B:
451 case NEUTRAL_S:
452 case NEUTRAL_WS:
453 case NEUTRAL_ON:
454 return NEUTRAL;
455 default:
456 abort ();
457 }
458 }
459
460 /* FIXME: exceedingly temporary! Should consult the Unicode database
461 of character properties. */
462 int
463 bidi_mirror_char (int c)
464 {
465 static const char mirrored_pairs[] = "()<>[]{}";
466 const char *p = strchr (mirrored_pairs, c);
467
468 if (p)
469 {
470 size_t i = p - mirrored_pairs;
471
472 if ((i & 1) == 0)
473 return mirrored_pairs[i + 1];
474 else
475 return mirrored_pairs[i - 1];
476 }
477 return c;
478 }
479
480 /* Copy the bidi iterator from FROM to TO. To save cycles, this only
481 copies the part of the level stack that is actually in use. */
482 static inline void
483 bidi_copy_it (struct bidi_it *to, struct bidi_it *from)
484 {
485 int i;
486
487 /* Copy everything except the level stack. */
488 memcpy (to, from, ((int)&((struct bidi_it *)0)->level_stack[0]));
489
490 /* Copy the active part of the level stack. */
491 to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */
492 for (i = 1; i <= from->stack_idx; i++)
493 to->level_stack[i] = from->level_stack[i];
494 }
495
496 /* Caching the bidi iterator states. */
497
498 static struct bidi_it bidi_cache[1000]; /* FIXME: make this dynamically allocated! */
499 static int bidi_cache_idx;
500 static int bidi_cache_last_idx;
501
502 static inline void
503 bidi_cache_reset (void)
504 {
505 bidi_cache_idx = 0;
506 bidi_cache_last_idx = -1;
507 }
508
509 static inline void
510 bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it)
511 {
512 int current_scan_dir = bidi_it->scan_dir;
513
514 if (idx < 0 || idx >= bidi_cache_idx)
515 abort ();
516
517 bidi_copy_it (bidi_it, &bidi_cache[idx]);
518 bidi_it->scan_dir = current_scan_dir;
519 bidi_cache_last_idx = idx;
520 }
521
522 /* Find a cached state with a given CHARPOS and resolved embedding
523 level less or equal to LEVEL. if LEVEL is -1, disregard the
524 resolved levels in cached states. DIR, if non-zero, means search
525 in that direction from the last cache hit. */
526 static inline int
527 bidi_cache_search (int charpos, int level, int dir)
528 {
529 int i, i_start;
530
531 if (bidi_cache_idx)
532 {
533 if (charpos < bidi_cache[bidi_cache_last_idx].charpos)
534 dir = -1;
535 else if (charpos > bidi_cache[bidi_cache_last_idx].charpos)
536 dir = 1;
537 if (dir)
538 i_start = bidi_cache_last_idx;
539 else
540 {
541 dir = -1;
542 i_start = bidi_cache_idx - 1;
543 }
544
545 if (dir < 0)
546 {
547 /* Linear search for now; FIXME! */
548 for (i = i_start; i >= 0; i--)
549 if (bidi_cache[i].charpos == charpos
550 && (level == -1 || bidi_cache[i].resolved_level <= level))
551 return i;
552 }
553 else
554 {
555 for (i = i_start; i < bidi_cache_idx; i++)
556 if (bidi_cache[i].charpos == charpos
557 && (level == -1 || bidi_cache[i].resolved_level <= level))
558 return i;
559 }
560 }
561
562 return -1;
563 }
564
565 /* Find a cached state where the resolved level changes to a value
566 that is lower than LEVEL, and return its cache slot index. DIR is
567 the direction to search, starting with the last used cache slot.
568 BEFORE, if non-zero, means return the index of the slot that is
569 ``before'' the level change in the search direction. That is,
570 given the cached levels like this:
571
572 1122333442211
573 AB C
574
575 and assuming we are at the position cached at the slot marked with
576 C, searching backwards (DIR = -1) for LEVEL = 2 will return the
577 index of slot B or A, depending whether BEFORE is, respectively,
578 non-zero or zero. */
579 static int
580 bidi_cache_find_level_change (int level, int dir, int before)
581 {
582 if (bidi_cache_idx)
583 {
584 int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1;
585 int incr = before ? 1 : 0;
586
587 if (!dir)
588 dir = -1;
589 else if (!incr)
590 i += dir;
591
592 if (dir < 0)
593 {
594 while (i >= incr)
595 {
596 if (bidi_cache[i - incr].resolved_level >= 0
597 && bidi_cache[i - incr].resolved_level < level)
598 return i;
599 i--;
600 }
601 }
602 else
603 {
604 while (i < bidi_cache_idx - incr)
605 {
606 if (bidi_cache[i + incr].resolved_level >= 0
607 && bidi_cache[i + incr].resolved_level < level)
608 return i;
609 i++;
610 }
611 }
612 }
613
614 return -1;
615 }
616
617 static inline void
618 bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved)
619 {
620 int idx;
621
622 /* We should never cache on backward scans. */
623 if (bidi_it->scan_dir == -1)
624 abort ();
625 idx = bidi_cache_search (bidi_it->charpos, -1, 1);
626
627 if (idx < 0)
628 {
629 idx = bidi_cache_idx;
630 if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1)
631 abort ();
632 bidi_copy_it (&bidi_cache[idx], bidi_it);
633 if (!resolved)
634 bidi_cache[idx].resolved_level = -1;
635 }
636 else
637 {
638 /* Copy only the members which could have changed, to avoid
639 costly copying of the entire struct. */
640 bidi_cache[idx].type = bidi_it->type;
641 bidi_check_type (bidi_it->type);
642 bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1;
643 bidi_check_type (bidi_it->type_after_w1);
644 if (resolved)
645 bidi_cache[idx].resolved_level = bidi_it->resolved_level;
646 else
647 bidi_cache[idx].resolved_level = -1;
648 bidi_cache[idx].invalid_levels = bidi_it->invalid_levels;
649 bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels;
650 bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral;
651 bidi_cache[idx].next_for_ws = bidi_it->next_for_ws;
652 bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit;
653 }
654
655 bidi_cache_last_idx = idx;
656 if (idx >= bidi_cache_idx)
657 bidi_cache_idx = idx + 1;
658 }
659
660 static inline bidi_type_t
661 bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it)
662 {
663 int i = bidi_cache_search (charpos, level, bidi_it->scan_dir);
664
665 if (i >= 0)
666 {
667 bidi_dir_t current_scan_dir = bidi_it->scan_dir;
668
669 *bidi_it = bidi_cache[i];
670 bidi_cache_last_idx = i;
671 /* Don't let scan direction from from the cached state override
672 the current scan direction. */
673 bidi_it->scan_dir = current_scan_dir;
674 return bidi_it->type;
675 }
676
677 return UNKNOWN_BT;
678 }
679
680 static inline int
681 bidi_peek_at_next_level (struct bidi_it *bidi_it)
682 {
683 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
684 abort ();
685 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
686 }
687
688 /* Return non-zero if buffer's byte position POS is the last character
689 of a paragraph. THIS_CH is the character preceding the one at POS in
690 the buffer. */
691 int
692 bidi_at_paragraph_end (int this_ch, int pos)
693 {
694 int next_ch;
695
696 if (pos >= ZV_BYTE)
697 return 1;
698
699 next_ch = FETCH_CHAR (pos);
700 /* FIXME: This should support all Unicode characters that can end a
701 paragraph. */
702 return (this_ch == '\n' && next_ch == '\n');
703 }
704
705 /* Determine the start-of-run (sor) directional type given the two
706 embedding levels on either side of the run boundary. Also, update
707 the saved info about previously seen characters, since that info is
708 generally valid for a single level run. */
709 static inline void
710 bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after)
711 {
712 int higher_level = level_before > level_after ? level_before : level_after;
713
714 /* The prev_was_pdf gork is required for when we have several PDFs
715 in a row. In that case, we want to compute the sor type for the
716 next level run only once: when we see the first PDF. That's
717 because the sor type depends only on the higher of the two levels
718 that we find on the two sides of the level boundary (see UAX#9,
719 clause X10), and so we don't need to know the final embedding
720 level to which we descend after processing all the PDFs. */
721 if (level_before < level_after || !bidi_it->prev_was_pdf)
722 /* FIXME: should the default sor direction be user selectable? */
723 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
724 if (level_before > level_after)
725 bidi_it->prev_was_pdf = 1;
726
727 bidi_it->prev.type = UNKNOWN_BT;
728 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
729 bidi_it->last_strong.orig_type = UNKNOWN_BT;
730 bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L;
731 bidi_it->prev_for_neutral.charpos = bidi_it->charpos;
732 bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos;
733 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
734 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
735 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
736 }
737
738 void
739 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
740 {
741 int pos = bidi_it->charpos, bytepos = bidi_it->bytepos;
742 int ch;
743
744 /* We should never be called at EOB. */
745 if (bytepos >= ZV_BYTE)
746 abort ();
747
748 ch = FETCH_CHAR (bytepos);
749 bidi_it->ch_len = CHAR_BYTES (ch);
750 bidi_it->level_stack[0].level = 0; /* default for L2R */
751 if (dir == R2L)
752 bidi_it->level_stack[0].level = 1;
753 else if (dir == NEUTRAL_DIR) /* P2 */
754 {
755 bidi_type_t type;
756
757 /* FIXME: should actually go to where the paragraph begins and
758 start the loop below from there, since UAX#9 says to find the
759 first strong directional character in the paragraph. */
760
761 for (type = bidi_get_type (ch), pos++, bytepos += bidi_it->ch_len;
762 /* NOTE: UAX#9 says to search only for L, AL, or R types of
763 characters, and ignore RLE, RLO, LRE, and LRO. However,
764 I'm not sure it makes sense to omit those 4; should try
765 with and without that to see the effect. */
766 (bidi_get_category (type) != STRONG)
767 || (bidi_ignore_explicit_marks_for_paragraph_level
768 && (type == RLE || type == RLO
769 || type == LRE || type == LRO));
770 type = bidi_get_type (ch))
771 {
772 if (type == NEUTRAL_B || bidi_at_paragraph_end (ch, bytepos))
773 break;
774 FETCH_CHAR_ADVANCE (ch, pos, bytepos);
775 }
776 if (type == STRONG_R || type == STRONG_AL) /* P3 */
777 bidi_it->level_stack[0].level = 1;
778 }
779 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
780 bidi_it->resolved_level = bidi_it->level_stack[0].level;
781 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
782 bidi_it->invalid_levels = 0;
783 bidi_it->invalid_rl_levels = -1;
784 bidi_it->new_paragraph = 0;
785 bidi_it->next_en_pos = -1;
786 bidi_it->next_for_ws.type = UNKNOWN_BT;
787 bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */
788
789 bidi_cache_reset ();
790 }
791
792 /* Do whatever UAX#9 clause X8 says should be done at paragraph's end,
793 and set the new paragraph flag in the iterator. */
794 static inline void
795 bidi_set_paragraph_end (struct bidi_it *bidi_it)
796 {
797 bidi_it->invalid_levels = 0;
798 bidi_it->invalid_rl_levels = -1;
799 bidi_it->stack_idx = 0;
800 bidi_it->resolved_level = bidi_it->level_stack[0].level;
801 bidi_it->new_paragraph = 1;
802 }
803
804 /* Initialize the bidi iterator from buffer position CHARPOS. */
805 void
806 bidi_init_it (int charpos, int bytepos, struct bidi_it *bidi_it)
807 {
808 if (! bidi_initialized)
809 bidi_initialize ();
810 bidi_set_paragraph_end (bidi_it);
811 bidi_it->charpos = charpos;
812 bidi_it->bytepos = bytepos;
813 bidi_it->type = NEUTRAL_B;
814 bidi_it->type_after_w1 = UNKNOWN_BT;
815 bidi_it->orig_type = UNKNOWN_BT;
816 bidi_it->prev_was_pdf = 0;
817 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
818 bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 =
819 bidi_it->last_strong.orig_type = UNKNOWN_BT;
820 bidi_it->next_for_neutral.charpos = -1;
821 bidi_it->next_for_neutral.type =
822 bidi_it->next_for_neutral.type_after_w1 =
823 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
824 bidi_it->prev_for_neutral.charpos = -1;
825 bidi_it->prev_for_neutral.type =
826 bidi_it->prev_for_neutral.type_after_w1 =
827 bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT;
828 bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */
829 }
830
831 /* Push the current embedding level and override status; reset the
832 current level to LEVEL and the current override status to OVERRIDE. */
833 static inline void
834 bidi_push_embedding_level (struct bidi_it *bidi_it,
835 int level, bidi_dir_t override)
836 {
837 bidi_it->stack_idx++;
838 if (bidi_it->stack_idx >= BIDI_MAXLEVEL)
839 abort ();
840 bidi_it->level_stack[bidi_it->stack_idx].level = level;
841 bidi_it->level_stack[bidi_it->stack_idx].override = override;
842 }
843
844 /* Pop the embedding level and directional override status from the
845 stack, and return the new level. */
846 static inline int
847 bidi_pop_embedding_level (struct bidi_it *bidi_it)
848 {
849 /* UAX#9 says to ignore invalid PDFs. */
850 if (bidi_it->stack_idx > 0)
851 bidi_it->stack_idx--;
852 return bidi_it->level_stack[bidi_it->stack_idx].level;
853 }
854
855 /* Record in SAVED_INFO the information about the current character. */
856 static inline void
857 bidi_remember_char (struct bidi_saved_info *saved_info,
858 struct bidi_it *bidi_it)
859 {
860 saved_info->charpos = bidi_it->charpos;
861 saved_info->bytepos = bidi_it->bytepos;
862 saved_info->type = bidi_it->type;
863 bidi_check_type (bidi_it->type);
864 saved_info->type_after_w1 = bidi_it->type_after_w1;
865 bidi_check_type (bidi_it->type_after_w1);
866 saved_info->orig_type = bidi_it->orig_type;
867 bidi_check_type (bidi_it->orig_type);
868 }
869
870 /* Resolve the type of a neutral character according to the type of
871 surrounding strong text and the current embedding level. */
872 static inline bidi_type_t
873 bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev)
874 {
875 /* N1: European and Arabic numbers are treated as though they were R. */
876 if (next_type == WEAK_EN || next_type == WEAK_AN)
877 next_type = STRONG_R;
878 if (prev_type == WEAK_EN || prev_type == WEAK_AN)
879 prev_type = STRONG_R;
880
881 if (next_type == prev_type) /* N1 */
882 return next_type;
883 else if ((lev & 1) == 0) /* N2 */
884 return STRONG_L;
885 else
886 return STRONG_R;
887 }
888
889 static inline int
890 bidi_explicit_dir_char (int c)
891 {
892 /* FIXME: this should be replaced with a lookup table with suitable
893 bits set, like standard C ctype macros do. */
894 return (c == LRE_CHAR || c == LRO_CHAR
895 || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR);
896 }
897
898 /* A helper function for bidi_resolve_explicit. It advances to the
899 next character in logical order and determines the new embedding
900 level and directional override, but does not take into account
901 empty embeddings. */
902 static int
903 bidi_resolve_explicit_1 (struct bidi_it *bidi_it)
904 {
905 int curchar;
906 bidi_type_t type;
907 int current_level;
908 int new_level;
909 bidi_dir_t override;
910
911 if (bidi_it->bytepos < BEGV_BYTE) /* after reseat to BEGV */
912 {
913 bidi_it->charpos = BEGV;
914 bidi_it->bytepos = BEGV_BYTE;
915 }
916 else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
917 {
918 bidi_it->charpos++;
919 if (bidi_it->ch_len == 0)
920 abort ();
921 bidi_it->bytepos += bidi_it->ch_len;
922 }
923
924 current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */
925 override = bidi_it->level_stack[bidi_it->stack_idx].override;
926 new_level = current_level;
927
928 /* in case it is a unibyte character (not yet implemented) */
929 /* _fetch_multibyte_char_len = 1; */
930 if (bidi_it->bytepos >= ZV_BYTE)
931 {
932 curchar = BIDI_EOB;
933 bidi_it->ch_len = 1;
934 }
935 else
936 {
937 curchar = FETCH_CHAR (bidi_it->bytepos);
938 bidi_it->ch_len = CHAR_BYTES (curchar);
939 }
940 bidi_it->ch = curchar;
941
942 type = bidi_get_type (curchar);
943 bidi_it->orig_type = type;
944 bidi_check_type (bidi_it->orig_type);
945
946 if (type != PDF)
947 bidi_it->prev_was_pdf = 0;
948
949 bidi_it->type_after_w1 = UNKNOWN_BT;
950
951 switch (type)
952 {
953 case RLE: /* X2 */
954 case RLO: /* X4 */
955 bidi_it->type_after_w1 = type;
956 bidi_check_type (bidi_it->type_after_w1);
957 type = WEAK_BN; /* X9/Retaining */
958 if (bidi_it->ignore_bn_limit <= 0)
959 {
960 if (current_level <= BIDI_MAXLEVEL - 4)
961 {
962 /* Compute the least odd embedding level greater than
963 the current level. */
964 new_level = ((current_level + 1) & ~1) + 1;
965 if (bidi_it->type_after_w1 == RLE)
966 override = NEUTRAL_DIR;
967 else
968 override = R2L;
969 if (current_level == BIDI_MAXLEVEL - 4)
970 bidi_it->invalid_rl_levels = 0;
971 bidi_push_embedding_level (bidi_it, new_level, override);
972 }
973 else
974 {
975 bidi_it->invalid_levels++;
976 /* See the commentary about invalid_rl_levels below. */
977 if (bidi_it->invalid_rl_levels < 0)
978 bidi_it->invalid_rl_levels = 0;
979 bidi_it->invalid_rl_levels++;
980 }
981 }
982 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
983 || bidi_it->next_en_pos > bidi_it->charpos)
984 type = WEAK_EN;
985 break;
986 case LRE: /* X3 */
987 case LRO: /* X5 */
988 bidi_it->type_after_w1 = type;
989 bidi_check_type (bidi_it->type_after_w1);
990 type = WEAK_BN; /* X9/Retaining */
991 if (bidi_it->ignore_bn_limit <= 0)
992 {
993 if (current_level <= BIDI_MAXLEVEL - 5)
994 {
995 /* Compute the least even embedding level greater than
996 the current level. */
997 new_level = ((current_level + 2) & ~1);
998 if (bidi_it->type_after_w1 == LRE)
999 override = NEUTRAL_DIR;
1000 else
1001 override = L2R;
1002 bidi_push_embedding_level (bidi_it, new_level, override);
1003 }
1004 else
1005 {
1006 bidi_it->invalid_levels++;
1007 /* invalid_rl_levels counts invalid levels encountered
1008 while the embedding level was already too high for
1009 LRE/LRO, but not for RLE/RLO. That is because
1010 there may be exactly one PDF which we should not
1011 ignore even though invalid_levels is non-zero.
1012 invalid_rl_levels helps to know what PDF is
1013 that. */
1014 if (bidi_it->invalid_rl_levels >= 0)
1015 bidi_it->invalid_rl_levels++;
1016 }
1017 }
1018 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1019 || bidi_it->next_en_pos > bidi_it->charpos)
1020 type = WEAK_EN;
1021 break;
1022 case PDF: /* X7 */
1023 bidi_it->type_after_w1 = type;
1024 bidi_check_type (bidi_it->type_after_w1);
1025 type = WEAK_BN; /* X9/Retaining */
1026 if (bidi_it->ignore_bn_limit <= 0)
1027 {
1028 if (!bidi_it->invalid_rl_levels)
1029 {
1030 new_level = bidi_pop_embedding_level (bidi_it);
1031 bidi_it->invalid_rl_levels = -1;
1032 if (bidi_it->invalid_levels)
1033 bidi_it->invalid_levels--;
1034 /* else nothing: UAX#9 says to ignore invalid PDFs */
1035 }
1036 if (!bidi_it->invalid_levels)
1037 new_level = bidi_pop_embedding_level (bidi_it);
1038 else
1039 {
1040 bidi_it->invalid_levels--;
1041 bidi_it->invalid_rl_levels--;
1042 }
1043 }
1044 else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */
1045 || bidi_it->next_en_pos > bidi_it->charpos)
1046 type = WEAK_EN;
1047 break;
1048 default:
1049 /* Nothing. */
1050 break;
1051 }
1052
1053 bidi_it->type = type;
1054 bidi_check_type (bidi_it->type);
1055
1056 return new_level;
1057 }
1058
1059 /* Given an iterator state in BIDI_IT, advance one character position
1060 in the buffer to the next character (in the logical order), resolve
1061 any explicit embeddings and directional overrides, and return the
1062 embedding level of the character after resolving explicit
1063 directives and ignoring empty embeddings. */
1064 static int
1065 bidi_resolve_explicit (struct bidi_it *bidi_it)
1066 {
1067 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1068 int new_level = bidi_resolve_explicit_1 (bidi_it);
1069
1070 if (prev_level < new_level
1071 && bidi_it->type == WEAK_BN
1072 && bidi_it->ignore_bn_limit == 0 /* only if not already known */
1073 && bidi_it->ch != BIDI_EOB /* not already at EOB */
1074 && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1075 + bidi_it->ch_len)))
1076 {
1077 /* Avoid pushing and popping embedding levels if the level run
1078 is empty, as this breaks level runs where it shouldn't.
1079 UAX#9 removes all the explicit embedding and override codes,
1080 so empty embeddings disappear without a trace. We need to
1081 behave as if we did the same. */
1082 struct bidi_it saved_it;
1083 int level = prev_level;
1084
1085 bidi_copy_it (&saved_it, bidi_it);
1086
1087 while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
1088 + bidi_it->ch_len)))
1089 {
1090 level = bidi_resolve_explicit_1 (bidi_it);
1091 }
1092
1093 if (level == prev_level) /* empty embedding */
1094 saved_it.ignore_bn_limit = bidi_it->charpos + 1;
1095 else /* this embedding is non-empty */
1096 saved_it.ignore_bn_limit = -1;
1097
1098 bidi_copy_it (bidi_it, &saved_it);
1099 if (bidi_it->ignore_bn_limit > 0)
1100 {
1101 /* We pushed a level, but we shouldn't have. Undo that. */
1102 if (!bidi_it->invalid_rl_levels)
1103 {
1104 new_level = bidi_pop_embedding_level (bidi_it);
1105 bidi_it->invalid_rl_levels = -1;
1106 if (bidi_it->invalid_levels)
1107 bidi_it->invalid_levels--;
1108 }
1109 if (!bidi_it->invalid_levels)
1110 new_level = bidi_pop_embedding_level (bidi_it);
1111 else
1112 {
1113 bidi_it->invalid_levels--;
1114 bidi_it->invalid_rl_levels--;
1115 }
1116 }
1117 }
1118
1119 /* For when the paragraph end is defined by anything other than a
1120 special Unicode character (a.k.a. ``higher protocols''). */
1121 if (bidi_it->type != NEUTRAL_B)
1122 if (bidi_at_paragraph_end (bidi_it->ch,
1123 bidi_it->bytepos + bidi_it->ch_len))
1124 bidi_it->type = NEUTRAL_B;
1125
1126 if (bidi_it->type == NEUTRAL_B) /* X8 */
1127 {
1128 bidi_set_paragraph_end (bidi_it);
1129 bidi_it->type_after_w1 = bidi_it->type; /* needed below and in L1 */
1130 bidi_check_type (bidi_it->type_after_w1);
1131 }
1132
1133 return new_level;
1134 }
1135
1136 /* Advance in the buffer, resolve weak types and return the type of
1137 the next character after weak type resolution. */
1138 bidi_type_t
1139 bidi_resolve_weak (struct bidi_it *bidi_it)
1140 {
1141 bidi_type_t type;
1142 bidi_dir_t override;
1143 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1144 int new_level = bidi_resolve_explicit (bidi_it);
1145 int next_char;
1146 bidi_type_t type_of_next;
1147 struct bidi_it saved_it;
1148
1149 type = bidi_it->type;
1150 override = bidi_it->level_stack[bidi_it->stack_idx].override;
1151
1152 if (type == UNKNOWN_BT
1153 || type == LRE
1154 || type == LRO
1155 || type == RLE
1156 || type == RLO
1157 || type == PDF)
1158 abort ();
1159
1160 if (new_level != prev_level
1161 || bidi_it->type == NEUTRAL_B)
1162 {
1163 /* We've got a new embedding level run, compute the directional
1164 type of sor and initialize per-run variables (UAX#9, clause
1165 X10). */
1166 bidi_set_sor_type (bidi_it, prev_level, new_level);
1167 }
1168 else if (type == NEUTRAL_S || type == NEUTRAL_WS
1169 || type == WEAK_BN || type == STRONG_AL)
1170 bidi_it->type_after_w1 = type; /* needed in L1 */
1171 bidi_check_type (bidi_it->type_after_w1);
1172
1173 /* Level and directional override status are already recorded in
1174 bidi_it, and do not need any change; see X6. */
1175 if (override == R2L) /* X6 */
1176 type = STRONG_R;
1177 else if (override == L2R)
1178 type = STRONG_L;
1179 else if (type == STRONG_AL)
1180 type = STRONG_R; /* W3 */
1181 else if (type == WEAK_NSM) /* W1 */
1182 {
1183 /* Note that we don't need to consider the case where the prev
1184 character has its type overridden by an RLO or LRO: such
1185 characters are outside the current level run, and thus not
1186 relevant to this NSM. Thus, NSM gets the orig_type of the
1187 previous character. */
1188 if (bidi_it->prev.type != UNKNOWN_BT)
1189 type = bidi_it->prev.orig_type;
1190 else if (bidi_it->sor == R2L)
1191 type = STRONG_R;
1192 else if (bidi_it->sor == L2R)
1193 type = STRONG_L;
1194 else /* shouldn't happen! */
1195 abort ();
1196 if (type == WEAK_EN /* W2 after W1 */
1197 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1198 type = WEAK_AN;
1199 }
1200 else if (type == WEAK_EN /* W2 */
1201 && bidi_it->last_strong.type_after_w1 == STRONG_AL)
1202 type = WEAK_AN;
1203 else if ((type == WEAK_ES
1204 && (bidi_it->prev.type_after_w1 == WEAK_EN /* W4 */
1205 && (bidi_it->prev.orig_type == WEAK_EN
1206 || bidi_it->prev.orig_type == WEAK_NSM))) /* aft W1 */
1207 || (type == WEAK_CS
1208 && ((bidi_it->prev.type_after_w1 == WEAK_EN
1209 && (bidi_it->prev.orig_type == WEAK_EN /* W4 */
1210 || bidi_it->prev.orig_type == WEAK_NSM)) /* a/W1 */
1211 || bidi_it->prev.type_after_w1 == WEAK_AN))) /* W4 */
1212 {
1213 next_char =
1214 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1215 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1216 type_of_next = bidi_get_type (next_char);
1217
1218 if (type_of_next == WEAK_BN
1219 || bidi_explicit_dir_char (next_char))
1220 {
1221 bidi_copy_it (&saved_it, bidi_it);
1222 while (bidi_resolve_explicit (bidi_it) == new_level
1223 && bidi_it->type == WEAK_BN)
1224 ;
1225 type_of_next = bidi_it->type;
1226 bidi_copy_it (bidi_it, &saved_it);
1227 }
1228
1229 /* If the next character is EN, but the last strong-type
1230 character is AL, that next EN will be changed to AN when we
1231 process it in W2 above. So in that case, this ES should not
1232 be changed into EN. */
1233 if (type == WEAK_ES
1234 && type_of_next == WEAK_EN
1235 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1236 type = WEAK_EN;
1237 else if (type == WEAK_CS)
1238 {
1239 if (bidi_it->prev.type_after_w1 == WEAK_AN
1240 && (type_of_next == WEAK_AN
1241 /* If the next character is EN, but the last
1242 strong-type character is AL, EN will be later
1243 changed to AN when we process it in W2 above. So
1244 in that case, this ES should not be changed into
1245 EN. */
1246 || (type_of_next == WEAK_EN
1247 && bidi_it->last_strong.type_after_w1 == STRONG_AL)))
1248 type = WEAK_AN;
1249 else if (bidi_it->prev.type_after_w1 == WEAK_EN
1250 && type_of_next == WEAK_EN
1251 && bidi_it->last_strong.type_after_w1 != STRONG_AL)
1252 type = WEAK_EN;
1253 }
1254 }
1255 else if (type == WEAK_ET /* W5: ET with EN before or after it */
1256 || type == WEAK_BN) /* W5/Retaining */
1257 {
1258 if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN with EN before it */
1259 || bidi_it->next_en_pos > bidi_it->charpos)
1260 type = WEAK_EN;
1261 /* W5: ET with EN after it. */
1262 else
1263 {
1264 int en_pos = bidi_it->charpos + 1;
1265
1266 next_char =
1267 bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
1268 ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
1269 type_of_next = bidi_get_type (next_char);
1270
1271 if (type_of_next == WEAK_ET
1272 || type_of_next == WEAK_BN
1273 || bidi_explicit_dir_char (next_char))
1274 {
1275 bidi_copy_it (&saved_it, bidi_it);
1276 while (bidi_resolve_explicit (bidi_it) == new_level
1277 && (bidi_it->type == WEAK_BN || bidi_it->type == WEAK_ET))
1278 ;
1279 type_of_next = bidi_it->type;
1280 en_pos = bidi_it->charpos;
1281 bidi_copy_it (bidi_it, &saved_it);
1282 }
1283 if (type_of_next == WEAK_EN)
1284 {
1285 /* If the last strong character is AL, the EN we've
1286 found will become AN when we get to it (W2). */
1287 if (bidi_it->last_strong.type_after_w1 != STRONG_AL)
1288 {
1289 type = WEAK_EN;
1290 /* Remember this EN position, to speed up processing
1291 of the next ETs. */
1292 bidi_it->next_en_pos = en_pos;
1293 }
1294 else if (type == WEAK_BN)
1295 type = NEUTRAL_ON; /* W6/Retaining */
1296 }
1297 }
1298 }
1299
1300 if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */
1301 || (type == WEAK_BN
1302 && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */
1303 || bidi_it->prev.type_after_w1 == WEAK_ES
1304 || bidi_it->prev.type_after_w1 == WEAK_ET)))
1305 type = NEUTRAL_ON;
1306
1307 /* Store the type we've got so far, before we clobber it with strong
1308 types in W7 and while resolving neutral types. But leave alone
1309 the original types that were recorded above, because we will need
1310 them for the L1 clause. */
1311 if (bidi_it->type_after_w1 == UNKNOWN_BT)
1312 bidi_it->type_after_w1 = type;
1313 bidi_check_type (bidi_it->type_after_w1);
1314
1315 if (type == WEAK_EN) /* W7 */
1316 {
1317 if ((bidi_it->last_strong.type_after_w1 == STRONG_L)
1318 || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R))
1319 type = STRONG_L;
1320 }
1321
1322 bidi_it->type = type;
1323 bidi_check_type (bidi_it->type);
1324 return type;
1325 }
1326
1327 bidi_type_t
1328 bidi_resolve_neutral (struct bidi_it *bidi_it)
1329 {
1330 int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1331 bidi_type_t type = bidi_resolve_weak (bidi_it);
1332 int current_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1333
1334 if (!(type == STRONG_R
1335 || type == STRONG_L
1336 || type == WEAK_BN
1337 || type == WEAK_EN
1338 || type == WEAK_AN
1339 || type == NEUTRAL_B
1340 || type == NEUTRAL_S
1341 || type == NEUTRAL_WS
1342 || type == NEUTRAL_ON))
1343 abort ();
1344
1345 if (bidi_get_category (type) == NEUTRAL
1346 || (type == WEAK_BN && prev_level == current_level))
1347 {
1348 if (bidi_it->next_for_neutral.type != UNKNOWN_BT)
1349 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1350 bidi_it->next_for_neutral.type,
1351 current_level);
1352 else
1353 {
1354 /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in
1355 the assumption of batch-style processing; see clauses W4,
1356 W5, and especially N1, which require to look far forward
1357 (as well as back) in the buffer. May the fleas of a
1358 thousand camels infest the armpits of those who design
1359 supposedly general-purpose algorithms by looking at their
1360 own implementations, and fail to consider other possible
1361 implementations! */
1362 struct bidi_it saved_it;
1363 bidi_type_t next_type;
1364
1365 if (bidi_it->scan_dir == -1)
1366 abort ();
1367
1368 bidi_copy_it (&saved_it, bidi_it);
1369 /* Scan the text forward until we find the first non-neutral
1370 character, and then use that to resolve the neutral we
1371 are dealing with now. We also cache the scanned iterator
1372 states, to salvage some of the effort later. */
1373 bidi_cache_iterator_state (bidi_it, 0);
1374 do {
1375 /* Record the info about the previous character, so that
1376 it will be cached below with this state. */
1377 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1378 && bidi_it->type != WEAK_BN)
1379 bidi_remember_char (&bidi_it->prev, bidi_it);
1380 type = bidi_resolve_weak (bidi_it);
1381 /* Paragraph separators have their levels fully resolved
1382 at this point, so cache them as resolved. */
1383 bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B);
1384 /* FIXME: implement L1 here, by testing for a newline and
1385 resetting the level for any sequence of whitespace
1386 characters adjacent to it. */
1387 } while (!(type == NEUTRAL_B
1388 || (type != WEAK_BN
1389 && bidi_get_category (type) != NEUTRAL)
1390 /* This is all per level run, so stop when we
1391 reach the end of this level run. */
1392 || bidi_it->level_stack[bidi_it->stack_idx].level !=
1393 current_level));
1394
1395 bidi_remember_char (&saved_it.next_for_neutral, bidi_it);
1396
1397 switch (type)
1398 {
1399 case STRONG_L:
1400 case STRONG_R:
1401 case STRONG_AL:
1402 next_type = type;
1403 break;
1404 case WEAK_EN:
1405 case WEAK_AN:
1406 /* N1: ``European and Arabic numbers are treated as
1407 though they were R.'' */
1408 next_type = STRONG_R;
1409 saved_it.next_for_neutral.type = STRONG_R;
1410 break;
1411 case WEAK_BN:
1412 if (!bidi_explicit_dir_char (bidi_it->ch))
1413 abort (); /* can't happen: BNs are skipped */
1414 /* FALLTHROUGH */
1415 case NEUTRAL_B:
1416 /* Marched all the way to the end of this level run.
1417 We need to use the eor type, whose information is
1418 stored by bidi_set_sor_type in the prev_for_neutral
1419 member. */
1420 if (saved_it.type != WEAK_BN
1421 || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL)
1422 {
1423 next_type = bidi_it->prev_for_neutral.type;
1424 saved_it.next_for_neutral.type = next_type;
1425 bidi_check_type (next_type);
1426 }
1427 else
1428 {
1429 /* This is a BN which does not adjoin neutrals.
1430 Leave its type alone. */
1431 bidi_copy_it (bidi_it, &saved_it);
1432 return bidi_it->type;
1433 }
1434 break;
1435 default:
1436 abort ();
1437 }
1438 type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type,
1439 next_type, current_level);
1440 saved_it.type = type;
1441 bidi_check_type (type);
1442 bidi_copy_it (bidi_it, &saved_it);
1443 }
1444 }
1445 return type;
1446 }
1447
1448 /* Given an iterator state in BIDI_IT, advance one character position
1449 in the buffer to the next character (in the logical order), resolve
1450 the bidi type of that next character, and return that type. */
1451 bidi_type_t
1452 bidi_type_of_next_char (struct bidi_it *bidi_it)
1453 {
1454 bidi_type_t type;
1455
1456 /* This should always be called during a forward scan. */
1457 if (bidi_it->scan_dir != 1)
1458 abort ();
1459
1460 /* Reset the limit until which to ignore BNs if we step out of the
1461 area where we found only empty levels. */
1462 if ((bidi_it->ignore_bn_limit > 0
1463 && bidi_it->ignore_bn_limit <= bidi_it->charpos)
1464 || (bidi_it->ignore_bn_limit == -1
1465 && !bidi_explicit_dir_char (bidi_it->ch)))
1466 bidi_it->ignore_bn_limit = 0;
1467
1468 type = bidi_resolve_neutral (bidi_it);
1469
1470 return type;
1471 }
1472
1473 /* Given an iterator state BIDI_IT, advance one character position in
1474 the buffer to the next character (in the logical order), resolve
1475 the embedding and implicit levels of that next character, and
1476 return the resulting level. */
1477 int
1478 bidi_level_of_next_char (struct bidi_it *bidi_it)
1479 {
1480 bidi_type_t type;
1481 int level, prev_level = -1;
1482 struct bidi_saved_info next_for_neutral;
1483
1484 if (bidi_it->scan_dir == 1)
1485 {
1486 /* There's no sense in trying to advance if we hit end of text. */
1487 if (bidi_it->ch == BIDI_EOB)
1488 return bidi_it->resolved_level;
1489
1490 /* Record the info about the previous character. */
1491 if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */
1492 && bidi_it->type != WEAK_BN)
1493 bidi_remember_char (&bidi_it->prev, bidi_it);
1494 if (bidi_it->type_after_w1 == STRONG_R
1495 || bidi_it->type_after_w1 == STRONG_L
1496 || bidi_it->type_after_w1 == STRONG_AL)
1497 bidi_remember_char (&bidi_it->last_strong, bidi_it);
1498 /* FIXME: it sounds like we don't need both prev and
1499 prev_for_neutral members, but I'm leaving them both for now. */
1500 if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L
1501 || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN)
1502 bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it);
1503
1504 /* If we overstepped the characters used for resolving neutrals
1505 and whitespace, invalidate their info in the iterator. */
1506 if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos)
1507 bidi_it->next_for_neutral.type = UNKNOWN_BT;
1508 if (bidi_it->next_en_pos >= 0
1509 && bidi_it->charpos >= bidi_it->next_en_pos)
1510 bidi_it->next_en_pos = -1;
1511 if (bidi_it->next_for_ws.type != UNKNOWN_BT
1512 && bidi_it->charpos >= bidi_it->next_for_ws.charpos)
1513 bidi_it->next_for_ws.type = UNKNOWN_BT;
1514
1515 /* This must be taken before we fill the iterator with the info
1516 about the next char. If we scan backwards, the iterator
1517 state must be already cached, so there's no need to know the
1518 embedding level of the previous character, since we will be
1519 returning to our caller shortly. */
1520 prev_level = bidi_it->level_stack[bidi_it->stack_idx].level;
1521 }
1522 next_for_neutral = bidi_it->next_for_neutral;
1523
1524 /* Perhaps it is already cached. */
1525 type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it);
1526 if (type != UNKNOWN_BT)
1527 {
1528 /* Don't lose the information for resolving neutrals! The
1529 cached states could have been cached before their
1530 next_for_neutral member was computed. If we are on our way
1531 forward, we can simply take the info from the previous
1532 state. */
1533 if (bidi_it->scan_dir == 1
1534 && bidi_it->next_for_neutral.type == UNKNOWN_BT)
1535 bidi_it->next_for_neutral = next_for_neutral;
1536
1537 /* If resolved_level is -1, it means this state was cached
1538 before it was completely resolved, so we cannot return
1539 it. */
1540 if (bidi_it->resolved_level != -1)
1541 return bidi_it->resolved_level;
1542 }
1543 if (bidi_it->scan_dir == -1)
1544 /* If we are going backwards, the iterator state is already cached
1545 from previous scans, and should be fully resolved. */
1546 abort ();
1547
1548 if (type == UNKNOWN_BT)
1549 type = bidi_type_of_next_char (bidi_it);
1550
1551 if (type == NEUTRAL_B)
1552 return bidi_it->resolved_level;
1553
1554 level = bidi_it->level_stack[bidi_it->stack_idx].level;
1555 if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */)
1556 || (type == WEAK_BN && prev_level == level))
1557 {
1558 if (bidi_it->next_for_neutral.type == UNKNOWN_BT)
1559 abort ();
1560
1561 /* If the cached state shows a neutral character, it was not
1562 resolved by bidi_resolve_neutral, so do it now. */
1563 type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type,
1564 bidi_it->next_for_neutral.type,
1565 level);
1566 }
1567
1568 if (!(type == STRONG_R
1569 || type == STRONG_L
1570 || type == WEAK_BN
1571 || type == WEAK_EN
1572 || type == WEAK_AN))
1573 abort ();
1574 bidi_it->type = type;
1575 bidi_check_type (bidi_it->type);
1576
1577 /* For L1 below, we need to know, for each WS character, whether
1578 it belongs to a sequence of WS characters preceeding a newline
1579 or a TAB or a paragraph separator. */
1580 if (bidi_it->orig_type == NEUTRAL_WS
1581 && bidi_it->next_for_ws.type == UNKNOWN_BT)
1582 {
1583 int ch;
1584 int clen = bidi_it->ch_len;
1585 int bpos = bidi_it->bytepos;
1586 int cpos = bidi_it->charpos;
1587 bidi_type_t chtype;
1588
1589 do {
1590 /*_fetch_multibyte_char_len = 1;*/
1591 ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
1592 bpos += clen;
1593 cpos++;
1594 clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
1595 if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
1596 chtype = NEUTRAL_B;
1597 else
1598 chtype = bidi_get_type (ch);
1599 } while (chtype == NEUTRAL_WS || chtype == WEAK_BN
1600 || bidi_explicit_dir_char (ch)); /* L1/Retaining */
1601 bidi_it->next_for_ws.type = chtype;
1602 bidi_check_type (bidi_it->next_for_ws.type);
1603 bidi_it->next_for_ws.charpos = cpos;
1604 bidi_it->next_for_ws.bytepos = bpos;
1605 }
1606
1607 /* Resolve implicit levels, with a twist: PDFs get the embedding
1608 level of the enbedding they terminate. See below for the
1609 reason. */
1610 if (bidi_it->orig_type == PDF
1611 /* Don't do this if this formatting code didn't change the
1612 embedding level due to invalid or empty embeddings. */
1613 && prev_level != level)
1614 {
1615 /* Don't look in UAX#9 for the reason for this: it's our own
1616 private quirk. The reason is that we want the formatting
1617 codes to be delivered so that they bracket the text of their
1618 embedding. For example, given the text
1619
1620 {RLO}teST{PDF}
1621
1622 we want it to be displayed as
1623
1624 {RLO}STet{PDF}
1625
1626 not as
1627
1628 STet{RLO}{PDF}
1629
1630 which will result because we bump up the embedding level as
1631 soon as we see the RLO and pop it as soon as we see the PDF,
1632 so RLO itself has the same embedding level as "teST", and
1633 thus would be normally delivered last, just before the PDF.
1634 The switch below fiddles with the level of PDF so that this
1635 ugly side effect does not happen.
1636
1637 (This is, of course, only important if the formatting codes
1638 are actually displayed, but Emacs does need to display them
1639 if the user wants to.) */
1640 level = prev_level;
1641 }
1642 else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
1643 || bidi_it->orig_type == NEUTRAL_S
1644 || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
1645 /* || bidi_it->ch == LINESEP_CHAR */
1646 || (bidi_it->orig_type == NEUTRAL_WS
1647 && (bidi_it->next_for_ws.type == NEUTRAL_B
1648 || bidi_it->next_for_ws.type == NEUTRAL_S)))
1649 level = bidi_it->level_stack[0].level;
1650 else if ((level & 1) == 0) /* I1 */
1651 {
1652 if (type == STRONG_R)
1653 level++;
1654 else if (type == WEAK_EN || type == WEAK_AN)
1655 level += 2;
1656 }
1657 else /* I2 */
1658 {
1659 if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN)
1660 level++;
1661 }
1662
1663 bidi_it->resolved_level = level;
1664 return level;
1665 }
1666
1667 /* Move to the other edge of a level given by LEVEL. If END_FLAG is
1668 non-zero, we are at the end of a level, and we need to prepare to
1669 resume the scan of the lower level.
1670
1671 If this level's other edge is cached, we simply jump to it, filling
1672 the iterator structure with the iterator state on the other edge.
1673 Otherwise, we walk the buffer until we come back to the same level
1674 as LEVEL.
1675
1676 Note: we are not talking here about a ``level run'' in the UAX#9
1677 sense of the term, but rather about a ``level'' which includes
1678 all the levels higher than it. In other words, given the levels
1679 like this:
1680
1681 11111112222222333333334443343222222111111112223322111
1682 A B C
1683
1684 and assuming we are at point A scanning left to right, this
1685 function moves to point C, whereas the UAX#9 ``level 2 run'' ends
1686 at point B. */
1687 static void
1688 bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag)
1689 {
1690 int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir;
1691 int idx;
1692
1693 /* Try the cache first. */
1694 if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0)
1695 bidi_cache_fetch_state (idx, bidi_it);
1696 else
1697 {
1698 int new_level;
1699
1700 if (end_flag)
1701 abort (); /* if we are at end of level, its edges must be cached */
1702
1703 bidi_cache_iterator_state (bidi_it, 1);
1704 do {
1705 new_level = bidi_level_of_next_char (bidi_it);
1706 bidi_cache_iterator_state (bidi_it, 1);
1707 } while (new_level >= level);
1708 }
1709 }
1710
1711 void
1712 bidi_get_next_char_visually (struct bidi_it *bidi_it)
1713 {
1714 int old_level, new_level, next_level;
1715 struct bidi_it prev_bidi_it;
1716
1717 if (bidi_it->scan_dir == 0)
1718 {
1719 bidi_it->scan_dir = 1; /* default to logical order */
1720 }
1721
1722 if (bidi_it->new_paragraph)
1723 bidi_paragraph_init (bidi_overriding_paragraph_direction, bidi_it);
1724 if (bidi_cache_idx == 0)
1725 bidi_copy_it (&prev_bidi_it, bidi_it);
1726
1727 old_level = bidi_it->resolved_level;
1728 new_level = bidi_level_of_next_char (bidi_it);
1729 if (bidi_it->ch == BIDI_EOB)
1730 return;
1731
1732 /* Reordering of resolved levels (clause L2) is implemented by
1733 jumping to the other edge of the level and flipping direction of
1734 scanning the buffer whenever we find a level change. */
1735 if (new_level != old_level)
1736 {
1737 int ascending = new_level > old_level;
1738 int level_to_search = ascending ? old_level + 1 : old_level;
1739 int incr = ascending ? 1 : -1;
1740 int expected_next_level = old_level + incr;
1741
1742 /* If we don't have anything cached yet, we need to cache the
1743 previous character we've seen, since we'll need it to record
1744 where to jump when the last non-base level is exhausted. */
1745 if (bidi_cache_idx == 0)
1746 bidi_cache_iterator_state (&prev_bidi_it, 1);
1747 /* Jump (or walk) to the other edge of this level. */
1748 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1749 /* Switch scan direction and peek at the next character in the
1750 new direction. */
1751 bidi_it->scan_dir = -bidi_it->scan_dir;
1752
1753 /* The following loop handles the case where the resolved level
1754 jumps by more than one. This is typical for numbers inside a
1755 run of text with left-to-right embedding direction, but can
1756 also happen in other situations. In those cases the decision
1757 where to continue after a level change, and in what direction,
1758 is tricky. For example, given a text like below:
1759
1760 abcdefgh
1761 11336622
1762
1763 (where the numbers below the text show the resolved levels),
1764 the result of reordering according to UAX#9 should be this:
1765
1766 efdcghba
1767
1768 This is implemented by the loop below which flips direction
1769 and jumps to the other edge of the level each time it finds
1770 the new level not to be the expected one. The expected level
1771 is always one more or one less than the previous one. */
1772 next_level = bidi_peek_at_next_level (bidi_it);
1773 while (next_level != expected_next_level)
1774 {
1775 expected_next_level += incr;
1776 level_to_search += incr;
1777 bidi_find_other_level_edge (bidi_it, level_to_search, !ascending);
1778 bidi_it->scan_dir = -bidi_it->scan_dir;
1779 next_level = bidi_peek_at_next_level (bidi_it);
1780 }
1781
1782 /* Finally, deliver the next character in the new direction. */
1783 next_level = bidi_level_of_next_char (bidi_it);
1784 }
1785
1786 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1787 {
1788 /* If we are at paragraph's base embedding level and beyond the
1789 last cached position, the cache's job is done and we can
1790 discard it. */
1791 if (bidi_it->resolved_level == bidi_it->level_stack[0].level
1792 && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos)
1793 bidi_cache_reset ();
1794 /* But as long as we are caching during forward scan, we must
1795 cache each state, or else the cache integrity will be
1796 compromised: it assumes cached states correspond to buffer
1797 positions 1:1. */
1798 else
1799 bidi_cache_iterator_state (bidi_it, 1);
1800 }
1801 }
1802
1803 /* This is meant to be called from within the debugger, whenever you
1804 wish to examine the cache contents. */
1805 void
1806 bidi_dump_cached_states (void)
1807 {
1808 int i;
1809 int ndigits = 1;
1810
1811 if (bidi_cache_idx == 0)
1812 {
1813 fprintf (stderr, "The cache is empty.\n");
1814 return;
1815 }
1816 fprintf (stderr, "Total of %d state%s in cache:\n",
1817 bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s");
1818
1819 for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10)
1820 ndigits++;
1821 fputs ("ch ", stderr);
1822 for (i = 0; i < bidi_cache_idx; i++)
1823 fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
1824 fputs ("\n", stderr);
1825 fputs ("lvl ", stderr);
1826 for (i = 0; i < bidi_cache_idx; i++)
1827 fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
1828 fputs ("\n", stderr);
1829 fputs ("pos ", stderr);
1830 for (i = 0; i < bidi_cache_idx; i++)
1831 fprintf (stderr, "%*d", ndigits, bidi_cache[i].charpos);
1832 fputs ("\n", stderr);
1833 }