* composite.c: Integer overflow fixes.
[bpt/emacs.git] / src / composite.c
CommitLineData
ca4c9455 1/* Composite sequence support.
73b0cd50 2 Copyright (C) 2001-2011 Free Software Foundation, Inc.
5df4f04c 3 Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
f30d8d94 6 Copyright (C) 2003, 2006
1527c36e
KH
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
ca4c9455
KH
9
10This file is part of GNU Emacs.
11
9ec0b715 12GNU Emacs is free software: you can redistribute it and/or modify
ca4c9455 13it under the terms of the GNU General Public License as published by
9ec0b715
GM
14the Free Software Foundation, either version 3 of the License, or
15(at your option) any later version.
ca4c9455
KH
16
17GNU Emacs is distributed in the hope that it will be useful,
18but WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20GNU General Public License for more details.
21
22You should have received a copy of the GNU General Public License
9ec0b715 23along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
ca4c9455
KH
24
25#include <config.h>
d7306fe6 26#include <setjmp.h>
ca4c9455
KH
27#include "lisp.h"
28#include "buffer.h"
1527c36e 29#include "character.h"
f5199465 30#include "coding.h"
ca4c9455 31#include "intervals.h"
58753d74
KH
32#include "window.h"
33#include "frame.h"
34#include "dispextern.h"
35#include "font.h"
f5199465
KH
36#include "termhooks.h"
37
ca4c9455
KH
38
39/* Emacs uses special text property `composition' to support character
40 composition. A sequence of characters that have the same (i.e. eq)
41 `composition' property value is treated as a single composite
42 sequence (we call it just `composition' here after). Characters in
43 a composition are all composed somehow on the screen.
44
45 The property value has this form when the composition is made:
46 ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
47 then turns to this form:
48 (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
49 when the composition is registered in composition_hash_table and
50 composition_table. These rather peculiar structures were designed
51 to make it easy to distinguish them quickly (we can do that by
52 checking only the first element) and to extract LENGTH (from the
53 former form) and COMPOSITION-ID (from the latter form).
54
55 We register a composition when it is displayed, or when the width
56 is required (for instance, to calculate columns).
57
58 LENGTH -- Length of the composition. This information is used to
59 check the validity of the composition.
60
61 COMPONENTS -- Character, string, vector, list, or nil.
62
63 If it is nil, characters in the text are composed relatively
64 according to their metrics in font glyphs.
65
66 If it is a character or a string, the character or characters
67 in the string are composed relatively.
68
69 If it is a vector or list of integers, the element is a
70 character or an encoded composition rule. The characters are
71 composed according to the rules. (2N)th elements are
72 characters to be composed and (2N+1)th elements are
73 composition rules to tell how to compose (2N+2)th element with
74 the previously composed 2N glyphs.
75
763d7377
EZ
76 COMPONENTS-VEC -- Vector of integers. In a relative composition,
77 the elements are the characters to be composed. In a rule-base
ca4c9455
KH
78 composition, the elements are characters or encoded
79 composition rules.
80
81 MODIFICATION-FUNC -- If non nil, it is a function to call when the
82 composition gets invalid after a modification in a buffer. If
83 it is nil, a function in `composition-function-table' of the
84 first character in the sequence is called.
85
86 COMPOSITION-ID --Identification number of the composition. It is
87 used as an index to composition_table for the composition.
88
89 When Emacs has to display a composition or has to know its
90 displaying width, the function get_composition_id is called. It
91 returns COMPOSITION-ID so that the caller can access the
92 information about the composition through composition_table. If a
93 COMPOSITION-ID has not yet been assigned to the composition,
94 get_composition_id checks the validity of `composition' property,
95 and, if valid, assigns a new ID, registers the information in
96 composition_hash_table and composition_table, and changes the form
763d7377
EZ
97 of the property value. If the property is invalid,
98 get_composition_id returns -1 without changing the property value.
ca4c9455 99
763d7377 100 We use two tables to keep the information about composition;
ca4c9455
KH
101 composition_hash_table and composition_table.
102
763d7377 103 The former is a hash table whose keys are COMPONENTS-VECs and
ca4c9455 104 values are the corresponding COMPOSITION-IDs. This hash table is
4abc7470 105 weak, but as each key (COMPONENTS-VEC) is also kept as a value of the
ca4c9455 106 `composition' property, it won't be collected as garbage until all
4abc7470 107 bits of text that have the same COMPONENTS-VEC are deleted.
ca4c9455
KH
108
109 The latter is a table of pointers to `struct composition' indexed
4abc7470 110 by COMPOSITION-ID. This structure keeps the other information (see
ca4c9455
KH
111 composite.h).
112
113 In general, a text property holds information about individual
114 characters. But, a `composition' property holds information about
4abc7470 115 a sequence of characters (in this sense, it is like the `intangible'
ca4c9455 116 property). That means that we should not share the property value
4abc7470 117 in adjacent compositions -- we can't distinguish them if they have the
ca4c9455
KH
118 same property. So, after any changes, we call
119 `update_compositions' and change a property of one of adjacent
120 compositions to a copy of it. This function also runs a proper
121 composition modification function to make a composition that gets
122 invalid by the change valid again.
123
4abc7470 124 As the value of the `composition' property holds information about a
ca4c9455 125 specific range of text, the value gets invalid if we change the
4abc7470 126 text in the range. We treat the `composition' property as always
ca4c9455
KH
127 rear-nonsticky (currently by setting default-text-properties to
128 (rear-nonsticky (composition))) and we never make properties of
129 adjacent compositions identical. Thus, any such changes make the
4abc7470 130 range just shorter. So, we can check the validity of the `composition'
ca4c9455
KH
131 property by comparing LENGTH information with the actual length of
132 the composition.
133
134*/
135
136
137Lisp_Object Qcomposition;
138
139/* Table of pointers to the structure `composition' indexed by
140 COMPOSITION-ID. This structure is for storing information about
141 each composition except for COMPONENTS-VEC. */
142struct composition **composition_table;
143
144/* The current size of `composition_table'. */
ebfa62c0 145static ptrdiff_t composition_table_size;
ca4c9455
KH
146
147/* Number of compositions currently made. */
ebfa62c0 148ptrdiff_t n_compositions;
ca4c9455
KH
149
150/* Hash table for compositions. The key is COMPONENTS-VEC of
151 `composition' property. The value is the corresponding
152 COMPOSITION-ID. */
153Lisp_Object composition_hash_table;
154
955cbe7b
PE
155static Lisp_Object Qauto_composed;
156static Lisp_Object Qauto_composition_function;
763d7377
EZ
157/* Maximum number of characters to look back for
158 auto-compositions. */
895416e3
KH
159#define MAX_AUTO_COMPOSITION_LOOKBACK 3
160
cd64ea1d
PE
161static Lisp_Object Fcomposition_get_gstring (Lisp_Object, Lisp_Object,
162 Lisp_Object, Lisp_Object);
ea058d2c 163
ca4c9455
KH
164/* Temporary variable used in macros COMPOSITION_XXX. */
165Lisp_Object composition_temp;
f30d8d94 166
ca4c9455 167\f
ca4c9455
KH
168/* Return COMPOSITION-ID of a composition at buffer position
169 CHARPOS/BYTEPOS and length NCHARS. The `composition' property of
170 the sequence is PROP. STRING, if non-nil, is a string that
171 contains the composition instead of the current buffer.
172
173 If the composition is invalid, return -1. */
174
ebfa62c0 175ptrdiff_t
579c18d0
EZ
176get_composition_id (EMACS_INT charpos, EMACS_INT bytepos, EMACS_INT nchars,
177 Lisp_Object prop, Lisp_Object string)
ca4c9455
KH
178{
179 Lisp_Object id, length, components, key, *key_contents;
d86d0d74 180 ptrdiff_t glyph_len;
ca4c9455 181 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (composition_hash_table);
d3411f89 182 ptrdiff_t hash_index;
0de4bb68 183 EMACS_UINT hash_code;
d86d0d74 184 enum composition_method method;
ca4c9455 185 struct composition *cmp;
579c18d0
EZ
186 EMACS_INT i;
187 int ch;
ca4c9455 188
d86d0d74
PE
189 /* Maximum length of a string of glyphs. XftGlyphExtents limits this
190 to INT_MAX, and Emacs may limit it further. */
191 enum {
192 glyph_len_max =
193 min (INT_MAX,
194 (min (PTRDIFF_MAX, SIZE_MAX)
195 / max (MAX_MULTIBYTE_LENGTH, 2 * sizeof (short))))
196 };
197
ca4c9455
KH
198 /* PROP should be
199 Form-A: ((LENGTH . COMPONENTS) . MODIFICATION-FUNC)
200 or
201 Form-B: (COMPOSITION-ID . (LENGTH COMPONENTS-VEC . MODIFICATION-FUNC))
202 */
203 if (nchars == 0 || !CONSP (prop))
204 goto invalid_composition;
205
206 id = XCAR (prop);
207 if (INTEGERP (id))
208 {
209 /* PROP should be Form-B. */
210 if (XINT (id) < 0 || XINT (id) >= n_compositions)
211 goto invalid_composition;
212 return XINT (id);
213 }
214
215 /* PROP should be Form-A.
216 Thus, ID should be (LENGTH . COMPONENTS). */
217 if (!CONSP (id))
218 goto invalid_composition;
219 length = XCAR (id);
220 if (!INTEGERP (length) || XINT (length) != nchars)
221 goto invalid_composition;
222
223 components = XCDR (id);
224
225 /* Check if the same composition has already been registered or not
226 by consulting composition_hash_table. The key for this table is
227 COMPONENTS (converted to a vector COMPONENTS-VEC) or, if it is
228 nil, vector of characters in the composition range. */
229 if (INTEGERP (components))
230 key = Fmake_vector (make_number (1), components);
231 else if (STRINGP (components) || CONSP (components))
232 key = Fvconcat (1, &components);
233 else if (VECTORP (components))
234 key = components;
235 else if (NILP (components))
236 {
237 key = Fmake_vector (make_number (nchars), Qnil);
238 if (STRINGP (string))
239 for (i = 0; i < nchars; i++)
240 {
241 FETCH_STRING_CHAR_ADVANCE (ch, string, charpos, bytepos);
242 XVECTOR (key)->contents[i] = make_number (ch);
243 }
244 else
245 for (i = 0; i < nchars; i++)
246 {
247 FETCH_CHAR_ADVANCE (ch, charpos, bytepos);
248 XVECTOR (key)->contents[i] = make_number (ch);
249 }
250 }
251 else
252 goto invalid_composition;
253
254 hash_index = hash_lookup (hash_table, key, &hash_code);
255 if (hash_index >= 0)
256 {
257 /* We have already registered the same composition. Change PROP
258 from Form-A above to Form-B while replacing COMPONENTS with
259 COMPONENTS-VEC stored in the hash table. We can directly
260 modify the cons cell of PROP because it is not shared. */
261 key = HASH_KEY (hash_table, hash_index);
262 id = HASH_VALUE (hash_table, hash_index);
f3fbd155
KR
263 XSETCAR (prop, id);
264 XSETCDR (prop, Fcons (make_number (nchars), Fcons (key, XCDR (prop))));
ca4c9455
KH
265 return XINT (id);
266 }
267
268 /* This composition is a new one. We must register it. */
177c0ea7 269
ca4c9455
KH
270 /* Check if we have sufficient memory to store this information. */
271 if (composition_table_size == 0)
272 {
ca4c9455 273 composition_table
ebfa62c0
PE
274 = (struct composition **) xmalloc (sizeof (composition_table[0]) * 256);
275 composition_table_size = 256;
ca4c9455
KH
276 }
277 else if (composition_table_size <= n_compositions)
278 {
ebfa62c0
PE
279 if ((min (MOST_POSITIVE_FIXNUM,
280 min (PTRDIFF_MAX, SIZE_MAX) / sizeof composition_table[0])
281 - 256)
282 < composition_table_size)
283 memory_full (SIZE_MAX);
ca4c9455
KH
284 composition_table
285 = (struct composition **) xrealloc (composition_table,
286 sizeof (composition_table[0])
ebfa62c0
PE
287 * (composition_table_size + 256));
288 composition_table_size += 256;
ca4c9455
KH
289 }
290
291 key_contents = XVECTOR (key)->contents;
292
293 /* Check if the contents of COMPONENTS are valid if COMPONENTS is a
294 vector or a list. It should be a sequence of:
295 char1 rule1 char2 rule2 char3 ... ruleN charN+1 */
f30d8d94 296
dd5e1ed4 297 if (VECTORP (components)
f30d8d94
KH
298 && ASIZE (components) >= 2
299 && VECTORP (AREF (components, 0)))
300 {
301 /* COMPONENTS is a glyph-string. */
7d100a81 302 EMACS_INT len = ASIZE (key);
f30d8d94
KH
303
304 for (i = 1; i < len; i++)
305 if (! VECTORP (AREF (key, i)))
306 goto invalid_composition;
307 }
dd5e1ed4 308 else if (VECTORP (components) || CONSP (components))
ca4c9455 309 {
7d100a81 310 EMACS_INT len = ASIZE (key);
ca4c9455
KH
311
312 /* The number of elements should be odd. */
313 if ((len % 2) == 0)
314 goto invalid_composition;
315 /* All elements should be integers (character or encoded
316 composition rule). */
317 for (i = 0; i < len; i++)
318 {
319 if (!INTEGERP (key_contents[i]))
320 goto invalid_composition;
321 }
322 }
323
324 /* Change PROP from Form-A above to Form-B. We can directly modify
325 the cons cell of PROP because it is not shared. */
326 XSETFASTINT (id, n_compositions);
f3fbd155
KR
327 XSETCAR (prop, id);
328 XSETCDR (prop, Fcons (make_number (nchars), Fcons (key, XCDR (prop))));
ca4c9455
KH
329
330 /* Register the composition in composition_hash_table. */
331 hash_index = hash_put (hash_table, key, id, hash_code);
332
d86d0d74
PE
333 method = (NILP (components)
334 ? COMPOSITION_RELATIVE
335 : ((INTEGERP (components) || STRINGP (components))
336 ? COMPOSITION_WITH_ALTCHARS
337 : COMPOSITION_WITH_RULE_ALTCHARS));
338
339 glyph_len = (method == COMPOSITION_WITH_RULE_ALTCHARS
340 ? (ASIZE (key) + 1) / 2
341 : ASIZE (key));
342
343 if (glyph_len_max < glyph_len)
344 memory_full (SIZE_MAX);
345
ca4c9455
KH
346 /* Register the composition in composition_table. */
347 cmp = (struct composition *) xmalloc (sizeof (struct composition));
348
d86d0d74 349 cmp->method = method;
ca4c9455 350 cmp->hash_index = hash_index;
ca4c9455
KH
351 cmp->glyph_len = glyph_len;
352 cmp->offsets = (short *) xmalloc (sizeof (short) * glyph_len * 2);
353 cmp->font = NULL;
354
58753d74 355 if (cmp->method != COMPOSITION_WITH_RULE_ALTCHARS)
ca4c9455
KH
356 {
357 /* Relative composition. */
358 cmp->width = 0;
359 for (i = 0; i < glyph_len; i++)
360 {
361 int this_width;
362 ch = XINT (key_contents[i]);
8e86803c 363 this_width = (ch == '\t' ? 1 : CHAR_WIDTH (ch));
ca4c9455
KH
364 if (cmp->width < this_width)
365 cmp->width = this_width;
366 }
367 }
368 else
369 {
370 /* Rule-base composition. */
371 float leftmost = 0.0, rightmost;
372
373 ch = XINT (key_contents[0]);
69df789a 374 rightmost = ch != '\t' ? CHAR_WIDTH (ch) : 1;
ca4c9455
KH
375
376 for (i = 1; i < glyph_len; i += 2)
377 {
b13a45c6 378 int rule, gref, nref;
ca4c9455
KH
379 int this_width;
380 float this_left;
381
382 rule = XINT (key_contents[i]);
383 ch = XINT (key_contents[i + 1]);
69df789a 384 this_width = ch != '\t' ? CHAR_WIDTH (ch) : 1;
ca4c9455
KH
385
386 /* A composition rule is specified by an integer value
387 that encodes global and new reference points (GREF and
388 NREF). GREF and NREF are specified by numbers as
389 below:
390 0---1---2 -- ascent
391 | |
392 | |
393 | |
394 9--10--11 -- center
395 | |
396 ---3---4---5--- baseline
397 | |
398 6---7---8 -- descent
399 */
b13a45c6 400 COMPOSITION_DECODE_REFS (rule, gref, nref);
ca4c9455
KH
401 this_left = (leftmost
402 + (gref % 3) * (rightmost - leftmost) / 2.0
403 - (nref % 3) * this_width / 2.0);
404
405 if (this_left < leftmost)
406 leftmost = this_left;
407 if (this_left + this_width > rightmost)
408 rightmost = this_left + this_width;
409 }
410
411 cmp->width = rightmost - leftmost;
412 if (cmp->width < (rightmost - leftmost))
413 /* To get a ceiling integer value. */
414 cmp->width++;
415 }
416
417 composition_table[n_compositions] = cmp;
418
419 return n_compositions++;
420
421 invalid_composition:
422 /* Would it be better to remove this `composition' property? */
423 return -1;
424}
425
426\f
90b3fe91
KH
427/* Find a static composition at or nearest to position POS of OBJECT
428 (buffer or string).
ca4c9455
KH
429
430 OBJECT defaults to the current buffer. If there's a composition at
431 POS, set *START and *END to the start and end of the sequence,
432 *PROP to the `composition' property, and return 1.
433
434 If there's no composition at POS and LIMIT is negative, return 0.
435
436 Otherwise, search for a composition forward (LIMIT > POS) or
437 backward (LIMIT < POS). In this case, LIMIT bounds the search.
438
439 If a composition is found, set *START, *END, and *PROP as above,
440 and return 1, else return 0.
441
442 This doesn't check the validity of composition. */
443
444int
579c18d0
EZ
445find_composition (EMACS_INT pos, EMACS_INT limit,
446 EMACS_INT *start, EMACS_INT *end,
447 Lisp_Object *prop, Lisp_Object object)
ca4c9455
KH
448{
449 Lisp_Object val;
450
451 if (get_property_and_range (pos, Qcomposition, prop, start, end, object))
452 return 1;
453
454 if (limit < 0 || limit == pos)
455 return 0;
456
457 if (limit > pos) /* search forward */
d279f620
KH
458 {
459 val = Fnext_single_property_change (make_number (pos), Qcomposition,
460 object, make_number (limit));
461 pos = XINT (val);
462 if (pos == limit)
463 return 0;
464 }
ca4c9455 465 else /* search backward */
d279f620
KH
466 {
467 if (get_property_and_range (pos - 1, Qcomposition, prop, start, end,
468 object))
469 return 1;
470 val = Fprevious_single_property_change (make_number (pos), Qcomposition,
471 object, make_number (limit));
472 pos = XINT (val);
473 if (pos == limit)
474 return 0;
475 pos--;
476 }
ca4c9455
KH
477 get_property_and_range (pos, Qcomposition, prop, start, end, object);
478 return 1;
479}
480
481/* Run a proper function to adjust the composition sitting between
482 FROM and TO with property PROP. */
483
484static void
579c18d0 485run_composition_function (EMACS_INT from, EMACS_INT to, Lisp_Object prop)
ca4c9455 486{
7d019510 487 Lisp_Object func;
aaefca97 488 EMACS_INT start, end;
ca4c9455
KH
489
490 func = COMPOSITION_MODIFICATION_FUNC (prop);
491 /* If an invalid composition precedes or follows, try to make them
492 valid too. */
493 if (from > BEGV
494 && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
495 && !COMPOSITION_VALID_P (start, end, prop))
496 from = start;
497 if (to < ZV
498 && find_composition (to, -1, &start, &end, &prop, Qnil)
499 && !COMPOSITION_VALID_P (start, end, prop))
500 to = end;
775b3d2d 501 if (!NILP (Ffboundp (func)))
ca4c9455 502 call2 (func, make_number (from), make_number (to));
ca4c9455
KH
503}
504
505/* Make invalid compositions adjacent to or inside FROM and TO valid.
506 CHECK_MASK is bitwise `or' of mask bits defined by macros
507 CHECK_XXX (see the comment in composite.h).
508
b418f8a6 509 It also resets the text-property `auto-composed' to a proper region
f96ba4c1
KH
510 so that automatic character composition works correctly later while
511 displaying the region.
8f924df7 512
ca4c9455
KH
513 This function is called when a buffer text is changed. If the
514 change is deletion, FROM == TO. Otherwise, FROM < TO. */
515
516void
971de7fb 517update_compositions (EMACS_INT from, EMACS_INT to, int check_mask)
ca4c9455 518{
7d019510 519 Lisp_Object prop;
aaefca97 520 EMACS_INT start, end;
f96ba4c1
KH
521 /* The beginning and end of the region to set the property
522 `auto-composed' to nil. */
aaefca97 523 EMACS_INT min_pos = from, max_pos = to;
ca4c9455 524
6c1aa7f1
GM
525 if (inhibit_modification_hooks)
526 return;
177c0ea7 527
d3f40cbd
KH
528 /* If FROM and TO are not in a valid range, do nothing. */
529 if (! (BEGV <= from && from <= to && to <= ZV))
530 return;
531
ca4c9455
KH
532 if (check_mask & CHECK_HEAD)
533 {
534 /* FROM should be at composition boundary. But, insertion or
535 deletion will make two compositions adjacent and
536 indistinguishable when they have same (eq) property. To
537 avoid it, in such a case, we change the property of the
538 latter to the copy of it. */
539 if (from > BEGV
553d3164
KH
540 && find_composition (from - 1, -1, &start, &end, &prop, Qnil)
541 && COMPOSITION_VALID_P (start, end, prop))
ca4c9455 542 {
f96ba4c1
KH
543 min_pos = start;
544 if (end > to)
545 max_pos = end;
ca4c9455
KH
546 if (from < end)
547 Fput_text_property (make_number (from), make_number (end),
548 Qcomposition,
549 Fcons (XCAR (prop), XCDR (prop)), Qnil);
550 run_composition_function (start, end, prop);
551 from = end;
552 }
dd33cc56 553 else if (from < ZV
553d3164 554 && find_composition (from, -1, &start, &from, &prop, Qnil)
9657d668 555 && COMPOSITION_VALID_P (start, from, prop))
f96ba4c1
KH
556 {
557 if (from > to)
558 max_pos = from;
559 run_composition_function (start, from, prop);
560 }
ca4c9455
KH
561 }
562
563 if (check_mask & CHECK_INSIDE)
564 {
565 /* In this case, we are sure that (check & CHECK_TAIL) is also
566 nonzero. Thus, here we should check only compositions before
567 (to - 1). */
568 while (from < to - 1
569 && find_composition (from, to, &start, &from, &prop, Qnil)
9657d668 570 && COMPOSITION_VALID_P (start, from, prop)
ca4c9455
KH
571 && from < to - 1)
572 run_composition_function (start, from, prop);
573 }
574
575 if (check_mask & CHECK_TAIL)
576 {
577 if (from < to
553d3164
KH
578 && find_composition (to - 1, -1, &start, &end, &prop, Qnil)
579 && COMPOSITION_VALID_P (start, end, prop))
ca4c9455
KH
580 {
581 /* TO should be also at composition boundary. But,
582 insertion or deletion will make two compositions adjacent
583 and indistinguishable when they have same (eq) property.
584 To avoid it, in such a case, we change the property of
585 the former to the copy of it. */
586 if (to < end)
f96ba4c1
KH
587 {
588 Fput_text_property (make_number (start), make_number (to),
589 Qcomposition,
590 Fcons (XCAR (prop), XCDR (prop)), Qnil);
591 max_pos = end;
592 }
ca4c9455
KH
593 run_composition_function (start, end, prop);
594 }
595 else if (to < ZV
553d3164
KH
596 && find_composition (to, -1, &start, &end, &prop, Qnil)
597 && COMPOSITION_VALID_P (start, end, prop))
f96ba4c1
KH
598 {
599 run_composition_function (start, end, prop);
600 max_pos = end;
601 }
ca4c9455 602 }
f96ba4c1 603 if (min_pos < max_pos)
9d440521
KH
604 {
605 int count = SPECPDL_INDEX ();
606
607 specbind (Qinhibit_read_only, Qt);
608 specbind (Qinhibit_modification_hooks, Qt);
609 specbind (Qinhibit_point_motion_hooks, Qt);
610 Fremove_list_of_text_properties (make_number (min_pos),
611 make_number (max_pos),
612 Fcons (Qauto_composed, Qnil), Qnil);
613 unbind_to (count, Qnil);
614 }
ca4c9455
KH
615}
616
c1361885
KH
617
618/* Modify composition property values in LIST destructively. LIST is
619 a list as returned from text_property_list. Change values to the
620 top-level copies of them so that none of them are `eq'. */
621
622void
971de7fb 623make_composition_value_copy (Lisp_Object list)
c1361885
KH
624{
625 Lisp_Object plist, val;
626
627 for (; CONSP (list); list = XCDR (list))
628 {
629 plist = XCAR (XCDR (XCDR (XCAR (list))));
630 while (CONSP (plist) && CONSP (XCDR (plist)))
631 {
632 if (EQ (XCAR (plist), Qcomposition)
633 && (val = XCAR (XCDR (plist)), CONSP (val)))
f3fbd155 634 XSETCAR (XCDR (plist), Fcons (XCAR (val), XCDR (val)));
c1361885
KH
635 plist = XCDR (XCDR (plist));
636 }
637 }
638}
639
640
ca4c9455
KH
641/* Make text in the region between START and END a composition that
642 has COMPONENTS and MODIFICATION-FUNC.
643
644 If STRING is non-nil, then operate on characters contained between
645 indices START and END in STRING. */
646
647void
579c18d0
EZ
648compose_text (EMACS_INT start, EMACS_INT end, Lisp_Object components,
649 Lisp_Object modification_func, Lisp_Object string)
ca4c9455
KH
650{
651 Lisp_Object prop;
652
653 prop = Fcons (Fcons (make_number (end - start), components),
654 modification_func);
655 Fput_text_property (make_number (start), make_number (end),
656 Qcomposition, prop, string);
657}
58753d74
KH
658
659
f57e2426
J
660static Lisp_Object autocmp_chars (Lisp_Object, EMACS_INT, EMACS_INT,
661 EMACS_INT, struct window *,
662 struct face *, Lisp_Object);
58753d74
KH
663
664\f
665/* Lisp glyph-string handlers */
666
667/* Hash table for automatic composition. The key is a header of a
668 lgstring (Lispy glyph-string), and the value is a body of a
669 lgstring. */
670
671static Lisp_Object gstring_hash_table;
672
f57e2426 673static Lisp_Object gstring_lookup_cache (Lisp_Object);
58753d74
KH
674
675static Lisp_Object
971de7fb 676gstring_lookup_cache (Lisp_Object header)
58753d74
KH
677{
678 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
d3411f89 679 ptrdiff_t i = hash_lookup (h, header, NULL);
58753d74
KH
680
681 return (i >= 0 ? HASH_VALUE (h, i) : Qnil);
682}
683
684Lisp_Object
dcd5c89a 685composition_gstring_put_cache (Lisp_Object gstring, EMACS_INT len)
58753d74
KH
686{
687 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
0de4bb68 688 EMACS_UINT hash;
58753d74 689 Lisp_Object header, copy;
dcd5c89a 690 EMACS_INT i;
58753d74
KH
691
692 header = LGSTRING_HEADER (gstring);
693 hash = h->hashfn (h, header);
694 if (len < 0)
695 {
7d100a81 696 EMACS_INT j, glyph_len = LGSTRING_GLYPH_LEN (gstring);
27ccc379
PE
697 for (j = 0; j < glyph_len; j++)
698 if (NILP (LGSTRING_GLYPH (gstring, j)))
58753d74 699 break;
27ccc379 700 len = j;
58753d74 701 }
087e2ea9 702
f2ed8a70 703 lint_assume (len <= TYPE_MAXIMUM (EMACS_INT) - 2);
58753d74
KH
704 copy = Fmake_vector (make_number (len + 2), Qnil);
705 LGSTRING_SET_HEADER (copy, Fcopy_sequence (header));
706 for (i = 0; i < len; i++)
707 LGSTRING_SET_GLYPH (copy, i, Fcopy_sequence (LGSTRING_GLYPH (gstring, i)));
708 i = hash_put (h, LGSTRING_HEADER (copy), copy, hash);
709 LGSTRING_SET_ID (copy, make_number (i));
710 return copy;
711}
712
713Lisp_Object
ebfa62c0 714composition_gstring_from_id (ptrdiff_t id)
58753d74
KH
715{
716 struct Lisp_Hash_Table *h = XHASH_TABLE (gstring_hash_table);
717
718 return HASH_VALUE (h, id);
719}
720
f57e2426
J
721static Lisp_Object fill_gstring_header (Lisp_Object, Lisp_Object,
722 Lisp_Object, Lisp_Object,
723 Lisp_Object);
58753d74
KH
724
725int
971de7fb 726composition_gstring_p (Lisp_Object gstring)
58753d74
KH
727{
728 Lisp_Object header;
5eb55db9 729 EMACS_INT i;
58753d74
KH
730
731 if (! VECTORP (gstring) || ASIZE (gstring) < 2)
732 return 0;
733 header = LGSTRING_HEADER (gstring);
734 if (! VECTORP (header) || ASIZE (header) < 2)
735 return 0;
736 if (! NILP (LGSTRING_FONT (gstring))
f5199465
KH
737 && (! FONT_OBJECT_P (LGSTRING_FONT (gstring))
738 && ! CODING_SYSTEM_P (LGSTRING_FONT (gstring))))
58753d74
KH
739 return 0;
740 for (i = 1; i < ASIZE (LGSTRING_HEADER (gstring)); i++)
741 if (! NATNUMP (AREF (LGSTRING_HEADER (gstring), i)))
742 return 0;
743 if (! NILP (LGSTRING_ID (gstring)) && ! NATNUMP (LGSTRING_ID (gstring)))
744 return 0;
745 for (i = 0; i < LGSTRING_GLYPH_LEN (gstring); i++)
746 {
747 Lisp_Object glyph = LGSTRING_GLYPH (gstring, i);
748 if (NILP (glyph))
749 break;
750 if (! VECTORP (glyph) || ASIZE (glyph) != LGLYPH_SIZE)
751 return 0;
752 }
753 return 1;
754}
755
756int
579c18d0
EZ
757composition_gstring_width (Lisp_Object gstring, EMACS_INT from, EMACS_INT to,
758 struct font_metrics *metrics)
58753d74
KH
759{
760 Lisp_Object *glyph;
761 int width = 0;
762
763 if (metrics)
764 {
765 Lisp_Object font_object = LGSTRING_FONT (gstring);
58753d74 766
f5199465
KH
767 if (FONT_OBJECT_P (font_object))
768 {
769 struct font *font = XFONT_OBJECT (font_object);
770
771 metrics->ascent = font->ascent;
772 metrics->descent = font->descent;
773 }
774 else
775 {
776 metrics->ascent = 1;
777 metrics->descent = 0;
778 }
58753d74
KH
779 metrics->width = metrics->lbearing = metrics->rbearing = 0;
780 }
781 for (glyph = &LGSTRING_GLYPH (gstring, from); from < to; from++, glyph++)
782 {
783 int x;
784
785 if (NILP (LGLYPH_ADJUSTMENT (*glyph)))
786 width += LGLYPH_WIDTH (*glyph);
787 else
788 width += LGLYPH_WADJUST (*glyph);
789 if (metrics)
790 {
791 x = metrics->width + LGLYPH_LBEARING (*glyph) + LGLYPH_XOFF (*glyph);
792 if (metrics->lbearing > x)
793 metrics->lbearing = x;
794 x = metrics->width + LGLYPH_RBEARING (*glyph) + LGLYPH_XOFF (*glyph);
795 if (metrics->rbearing < x)
796 metrics->rbearing = x;
797 metrics->width = width;
798 x = LGLYPH_ASCENT (*glyph) - LGLYPH_YOFF (*glyph);
799 if (metrics->ascent < x)
800 metrics->ascent = x;
895416e3 801 x = LGLYPH_DESCENT (*glyph) + LGLYPH_YOFF (*glyph);
58753d74
KH
802 if (metrics->descent < x)
803 metrics->descent = x;
804 }
805 }
806 return width;
807}
808
809
810static Lisp_Object gstring_work;
811static Lisp_Object gstring_work_headers;
812
813static Lisp_Object
971de7fb 814fill_gstring_header (Lisp_Object header, Lisp_Object start, Lisp_Object end, Lisp_Object font_object, Lisp_Object string)
58753d74
KH
815{
816 EMACS_INT from, to, from_byte;
817 EMACS_INT len, i;
818
819 if (NILP (string))
820 {
4b4deea2 821 if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
58753d74
KH
822 error ("Attempt to shape unibyte text");
823 validate_region (&start, &end);
824 from = XFASTINT (start);
825 to = XFASTINT (end);
826 from_byte = CHAR_TO_BYTE (from);
827 }
828 else
829 {
830 CHECK_STRING (string);
793ffee8 831 if (! STRING_MULTIBYTE (string))
58753d74 832 error ("Attempt to shape unibyte text");
ea8ba975 833 /* FROM and TO are checked by the caller. */
58753d74 834 from = XINT (start);
58753d74
KH
835 to = XINT (end);
836 if (from < 0 || from > to || to > SCHARS (string))
837 args_out_of_range_3 (string, start, end);
838 from_byte = string_char_to_byte (string, from);
839 }
840
841 len = to - from;
842 if (len == 0)
843 error ("Attempt to shape zero-length text");
844 if (VECTORP (header))
845 {
846 if (ASIZE (header) != len + 1)
847 args_out_of_range (header, make_number (len + 1));
848 }
849 else
850 {
851 if (len <= 8)
852 header = AREF (gstring_work_headers, len - 1);
853 else
854 header = Fmake_vector (make_number (len + 1), Qnil);
855 }
856
857 ASET (header, 0, font_object);
858 for (i = 0; i < len; i++)
859 {
860 int c;
861
862 if (NILP (string))
863 FETCH_CHAR_ADVANCE_NO_CHECK (c, from, from_byte);
864 else
865 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string, from, from_byte);
866 ASET (header, i + 1, make_number (c));
867 }
868 return header;
869}
870
58753d74 871static void
971de7fb 872fill_gstring_body (Lisp_Object gstring)
58753d74
KH
873{
874 Lisp_Object font_object = LGSTRING_FONT (gstring);
875 Lisp_Object header = AREF (gstring, 0);
876 EMACS_INT len = LGSTRING_CHAR_LEN (gstring);
877 EMACS_INT i;
878
879 for (i = 0; i < len; i++)
880 {
881 Lisp_Object g = LGSTRING_GLYPH (gstring, i);
f8c86b69 882 int c = XFASTINT (AREF (header, i + 1));
58753d74
KH
883
884 if (NILP (g))
885 {
886 g = LGLYPH_NEW ();
887 LGSTRING_SET_GLYPH (gstring, i, g);
888 }
889 LGLYPH_SET_FROM (g, i);
890 LGLYPH_SET_TO (g, i);
891 LGLYPH_SET_CHAR (g, c);
f5199465 892 if (FONT_OBJECT_P (font_object))
58753d74
KH
893 {
894 font_fill_lglyph_metrics (g, font_object);
895 }
896 else
897 {
898 int width = XFASTINT (CHAR_TABLE_REF (Vchar_width_table, c));
899
900 LGLYPH_SET_CODE (g, c);
901 LGLYPH_SET_LBEARING (g, 0);
902 LGLYPH_SET_RBEARING (g, width);
903 LGLYPH_SET_WIDTH (g, width);
904 LGLYPH_SET_ASCENT (g, 1);
905 LGLYPH_SET_DESCENT (g, 0);
906 }
907 LGLYPH_SET_ADJUSTMENT (g, Qnil);
908 }
909 if (i < LGSTRING_GLYPH_LEN (gstring))
910 LGSTRING_SET_GLYPH (gstring, i, Qnil);
911}
912
58753d74 913
10f72a37
KH
914/* Try to compose the characters at CHARPOS according to composition
915 rule RULE ([PATTERN PREV-CHARS FUNC]). LIMIT limits the characters
916 to compose. STRING, if not nil, is a target string. WIN is a
917 window where the characters are being displayed. If characters are
918 successfully composed, return the composition as a glyph-string
919 object. Otherwise return nil. */
58753d74
KH
920
921static Lisp_Object
971de7fb 922autocmp_chars (Lisp_Object rule, EMACS_INT charpos, EMACS_INT bytepos, EMACS_INT limit, struct window *win, struct face *face, Lisp_Object string)
58753d74
KH
923{
924 int count = SPECPDL_INDEX ();
925 FRAME_PTR f = XFRAME (win->frame);
926 Lisp_Object pos = make_number (charpos);
10f72a37 927 EMACS_INT to;
58753d74 928 EMACS_INT pt = PT, pt_byte = PT_BYTE;
10f72a37 929 Lisp_Object re, font_object, lgstring;
579c18d0 930 EMACS_INT len;
087e2ea9 931
58753d74 932 record_unwind_save_match_data ();
10f72a37
KH
933 re = AREF (rule, 0);
934 if (NILP (re))
935 len = 1;
936 else if (! STRINGP (re))
937 return unbind_to (count, Qnil);
938 else if ((len = fast_looking_at (re, charpos, bytepos, limit, -1, string))
939 > 0)
58753d74 940 {
10f72a37
KH
941 if (NILP (string))
942 len = BYTE_TO_CHAR (bytepos + len) - charpos;
943 else
944 len = string_byte_to_char (string, bytepos + len) - charpos;
945 }
946 if (len <= 0)
947 return unbind_to (count, Qnil);
948 to = limit = charpos + len;
58753d74 949#ifdef HAVE_WINDOW_SYSTEM
10f72a37
KH
950 if (FRAME_WINDOW_P (f))
951 {
952 font_object = font_range (charpos, &to, win, face, string);
953 if (! FONT_OBJECT_P (font_object)
954 || (! NILP (re)
955 && to < limit
956 && (fast_looking_at (re, charpos, bytepos, to, -1, string) <= 0)))
957 return unbind_to (count, Qnil);
958 }
959 else
58753d74 960#endif /* not HAVE_WINDOW_SYSTEM */
10f72a37
KH
961 font_object = win->frame;
962 lgstring = Fcomposition_get_gstring (pos, make_number (to), font_object,
963 string);
964 if (NILP (LGSTRING_ID (lgstring)))
965 {
966 Lisp_Object args[6];
5dcde606 967
10f72a37
KH
968 /* Save point as marker before calling out to lisp. */
969 if (NILP (string))
970 {
971 Lisp_Object m = Fmake_marker ();
972 set_marker_both (m, Qnil, pt, pt_byte);
973 record_unwind_protect (restore_point_unwind, m);
58753d74 974 }
10f72a37
KH
975
976 args[0] = Vauto_composition_function;
977 args[1] = AREF (rule, 2);
978 args[2] = pos;
979 args[3] = make_number (to);
980 args[4] = font_object;
981 args[5] = string;
982 lgstring = safe_call (6, args);
983 if (NILP (string))
984 TEMP_SET_PT_BOTH (pt, pt_byte);
58753d74 985 }
10f72a37 986 return unbind_to (count, lgstring);
58753d74
KH
987}
988
82ebc97b 989static Lisp_Object _work_val;
82ebc97b 990
f6aa6ec6
KH
991/* 1 iff the character C is composable. Characters of general
992 category Z? or C? are not composable except for ZWNJ and ZWJ. */
993
82ebc97b 994#define CHAR_COMPOSABLE_P(C) \
a28d4396
KH
995 ((C) > ' ' \
996 && ((C) == 0x200C || (C) == 0x200D \
997 || (_work_val = CHAR_TABLE_REF (Vunicode_category_table, (C)), \
c805dec0
KH
998 (INTEGERP (_work_val) \
999 && (XINT (_work_val) <= UNICODE_CATEGORY_So)))))
58753d74
KH
1000
1001/* Update cmp_it->stop_pos to the next position after CHARPOS (and
1002 BYTEPOS) where character composition may happen. If BYTEPOS is
82ebc97b
KH
1003 negative, compute it. ENDPOS is a limit of searching. If it is
1004 less than CHARPOS, search backward to ENDPOS+1 assuming that
1005 set_iterator_to_next works in reverse order. In this case, if a
1006 composition closest to CHARPOS is found, set cmp_it->stop_pos to
1007 the last character of the composition.
1008
1009 If no composition is found, set cmp_it->ch to -2. If a static
1010 composition is found, set cmp_it->ch to -1. Otherwise, set
1011 cmp_it->ch to the character that triggers the automatic
1012 composition. */
58753d74
KH
1013
1014void
971de7fb 1015composition_compute_stop_pos (struct composition_it *cmp_it, EMACS_INT charpos, EMACS_INT bytepos, EMACS_INT endpos, Lisp_Object string)
58753d74 1016{
f8c86b69
PE
1017 EMACS_INT start, end;
1018 int c;
58753d74 1019 Lisp_Object prop, val;
3ffdafce
KH
1020 /* This is from forward_to_next_line_start in xdisp.c. */
1021 const int MAX_NEWLINE_DISTANCE = 500;
58753d74 1022
82ebc97b
KH
1023 if (charpos < endpos)
1024 {
1025 if (endpos > charpos + MAX_NEWLINE_DISTANCE)
1026 endpos = charpos + MAX_NEWLINE_DISTANCE;
1027 }
1028 else if (endpos < charpos)
1029 {
1030 /* We search backward for a position to check composition. */
1031 if (endpos < 0)
1032 {
1033 /* But we don't know where to stop the searching. */
1034 endpos = NILP (string) ? BEGV - 1 : -1;
1035 /* Usually we don't reach ENDPOS because we stop searching
1036 at an uncomposable character (NL, LRE, etc). */
1037 }
1038 }
053ca52b 1039 cmp_it->id = -1;
44566dc7 1040 cmp_it->ch = -2;
82ebc97b
KH
1041 cmp_it->reversed_p = 0;
1042 cmp_it->stop_pos = endpos;
1043 if (charpos == endpos)
1044 return;
1045 /* FIXME: Bidi is not yet handled well in static composition. */
1046 if (charpos < endpos
1047 && find_composition (charpos, endpos, &start, &end, &prop, string)
15fa4783 1048 && start >= charpos
58753d74
KH
1049 && COMPOSITION_VALID_P (start, end, prop))
1050 {
1051 cmp_it->stop_pos = endpos = start;
1052 cmp_it->ch = -1;
1053 }
4b4deea2 1054 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
d9a7c140 1055 || NILP (Vauto_composition_mode))
58753d74
KH
1056 return;
1057 if (bytepos < 0)
1058 {
82ebc97b 1059 if (NILP (string))
58753d74 1060 bytepos = CHAR_TO_BYTE (charpos);
82ebc97b
KH
1061 else
1062 bytepos = string_char_to_byte (string, charpos);
58753d74
KH
1063 }
1064
1065 start = charpos;
82ebc97b 1066 if (charpos < endpos)
58753d74 1067 {
82ebc97b
KH
1068 /* Forward search. */
1069 while (charpos < endpos)
ea8ba975 1070 {
82ebc97b
KH
1071 if (STRINGP (string))
1072 FETCH_STRING_CHAR_ADVANCE (c, string, charpos, bytepos);
1073 else
1074 FETCH_CHAR_ADVANCE (c, charpos, bytepos);
1075 if (c == '\n')
1076 {
1077 cmp_it->ch = -2;
1078 break;
1079 }
1080 val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1081 if (! NILP (val))
1082 {
1083 Lisp_Object elt;
10f72a37 1084 int ridx;
82ebc97b 1085
10f72a37 1086 for (ridx = 0; CONSP (val); val = XCDR (val), ridx++)
82ebc97b
KH
1087 {
1088 elt = XCAR (val);
1089 if (VECTORP (elt) && ASIZE (elt) == 3
1090 && NATNUMP (AREF (elt, 1))
1091 && charpos - 1 - XFASTINT (AREF (elt, 1)) >= start)
1092 break;
1093 }
1094 if (CONSP (val))
1095 {
10f72a37 1096 cmp_it->rule_idx = ridx;
82ebc97b
KH
1097 cmp_it->lookback = XFASTINT (AREF (elt, 1));
1098 cmp_it->stop_pos = charpos - 1 - cmp_it->lookback;
1099 cmp_it->ch = c;
1100 return;
1101 }
1102 }
ea8ba975 1103 }
ff94e32c
KH
1104 if (charpos == endpos)
1105 {
1106 /* We couldn't find a composition point before ENDPOS. But,
1107 some character after ENDPOS may be composed with
1108 characters before ENDPOS. So, we should stop at the safe
1109 point. */
1110 charpos = endpos - MAX_AUTO_COMPOSITION_LOOKBACK;
1111 if (charpos < start)
1112 charpos = start;
1113 }
82ebc97b 1114 }
b7050583 1115 else if (charpos > endpos)
82ebc97b
KH
1116 {
1117 /* Search backward for a pattern that may be composed and the
1118 position of (possibly) the last character of the match is
1119 closest to (but not after) START. The reason for the last
d07f018c
EZ
1120 character is that set_iterator_to_next works in reverse order,
1121 and thus we must stop at the last character for composition
82ebc97b
KH
1122 check. */
1123 unsigned char *p;
1124 int len;
d07f018c
EZ
1125 /* Limit byte position used in fast_looking_at. This is the
1126 byte position of the character after START. */
82ebc97b
KH
1127 EMACS_INT limit;
1128
1129 if (NILP (string))
1130 p = BYTE_POS_ADDR (bytepos);
1131 else
1132 p = SDATA (string) + bytepos;
1133 c = STRING_CHAR_AND_LENGTH (p, len);
1134 limit = bytepos + len;
1135 while (CHAR_COMPOSABLE_P (c))
58753d74 1136 {
10f72a37
KH
1137 val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1138 if (! NILP (val))
82ebc97b 1139 {
10f72a37 1140 Lisp_Object elt;
8f5201ae 1141 int ridx, back, blen;
58753d74 1142
10f72a37 1143 for (ridx = 0; CONSP (val); val = XCDR (val), ridx++)
82ebc97b 1144 {
10f72a37
KH
1145 elt = XCAR (val);
1146 if (VECTORP (elt) && ASIZE (elt) == 3
1147 && NATNUMP (AREF (elt, 1))
1148 && charpos - (back = XFASTINT (AREF (elt, 1))) > endpos)
82ebc97b 1149 {
10f72a37
KH
1150 EMACS_INT cpos = charpos - back, bpos;
1151
1152 if (back == 0)
1153 bpos = bytepos;
82ebc97b 1154 else
10f72a37
KH
1155 bpos = (NILP (string) ? CHAR_TO_BYTE (cpos)
1156 : string_char_to_byte (string, cpos));
1157 if (STRINGP (AREF (elt, 0)))
8f5201ae
PE
1158 blen = fast_looking_at (AREF (elt, 0), cpos, bpos,
1159 start + 1, limit, string);
10f72a37 1160 else
8f5201ae
PE
1161 blen = 1;
1162 if (blen > 0)
82ebc97b 1163 {
10f72a37 1164 /* Make CPOS point to the last character of
8f5201ae
PE
1165 match. Note that BLEN is byte-length. */
1166 if (blen > 1)
10f72a37 1167 {
8f5201ae 1168 bpos += blen;
10f72a37
KH
1169 if (NILP (string))
1170 cpos = BYTE_TO_CHAR (bpos) - 1;
1171 else
1172 cpos = string_byte_to_char (string, bpos) - 1;
1173 }
1174 back = cpos - (charpos - back);
1175 if (cmp_it->stop_pos < cpos
1176 || (cmp_it->stop_pos == cpos
1177 && cmp_it->lookback < back))
1178 {
1179 cmp_it->rule_idx = ridx;
1180 cmp_it->stop_pos = cpos;
1181 cmp_it->ch = c;
1182 cmp_it->lookback = back;
1183 cmp_it->nchars = back + 1;
1184 }
82ebc97b
KH
1185 }
1186 }
1187 }
1188 }
1189 if (charpos - 1 == endpos)
1190 break;
1191 if (STRINGP (string))
58753d74 1192 {
82ebc97b
KH
1193 p--, bytepos--;
1194 while (! CHAR_HEAD_P (*p))
1195 p--, bytepos--;
1196 charpos--;
1197 }
1198 else
1199 {
1200 DEC_BOTH (charpos, bytepos);
1201 p = BYTE_POS_ADDR (bytepos);
58753d74 1202 }
82ebc97b
KH
1203 c = STRING_CHAR (p);
1204 }
1205 if (cmp_it->ch >= 0)
1206 /* We found a position to check. */
1207 return;
1208 /* Skip all uncomposable characters. */
1209 if (NILP (string))
1210 {
1211 while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c))
58753d74 1212 {
82ebc97b
KH
1213 DEC_BOTH (charpos, bytepos);
1214 c = FETCH_MULTIBYTE_CHAR (bytepos);
1215 }
1216 }
1217 else
1218 {
1219 while (charpos - 1 > endpos && ! CHAR_COMPOSABLE_P (c))
1220 {
1221 p--;
1222 while (! CHAR_HEAD_P (*p))
1223 p--;
1224 charpos--;
1225 c = STRING_CHAR (p);
58753d74
KH
1226 }
1227 }
1228 }
44566dc7 1229 cmp_it->stop_pos = charpos;
58753d74
KH
1230}
1231
1232/* Check if the character at CHARPOS (and BYTEPOS) is composed
22e33406 1233 (possibly with the following characters) on window W. ENDPOS limits
58753d74
KH
1234 characters to be composed. FACE, in non-NULL, is a base face of
1235 the character. If STRING is not nil, it is a string containing the
1236 character to check, and CHARPOS and BYTEPOS are indices in the
1237 string. In that case, FACE must not be NULL.
1238
1239 If the character is composed, setup members of CMP_IT (id, nglyphs,
82ebc97b
KH
1240 from, to, reversed_p), and return 1. Otherwise, update
1241 CMP_IT->stop_pos, and return 0. */
58753d74
KH
1242
1243int
971de7fb 1244composition_reseat_it (struct composition_it *cmp_it, EMACS_INT charpos, EMACS_INT bytepos, EMACS_INT endpos, struct window *w, struct face *face, Lisp_Object string)
58753d74 1245{
f6aa6ec6
KH
1246 if (endpos < 0)
1247 endpos = NILP (string) ? BEGV : 0;
e614ea00 1248
3ffdafce
KH
1249 if (cmp_it->ch == -2)
1250 {
1251 composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
10f72a37 1252 if (cmp_it->ch == -2 || cmp_it->stop_pos != charpos)
82ebc97b 1253 /* The current position is not composed. */
3ffdafce
KH
1254 return 0;
1255 }
1256
58753d74
KH
1257 if (cmp_it->ch < 0)
1258 {
1259 /* We are looking at a static composition. */
1260 EMACS_INT start, end;
1261 Lisp_Object prop;
1262
1263 find_composition (charpos, -1, &start, &end, &prop, string);
1264 cmp_it->id = get_composition_id (charpos, bytepos, end - start,
1265 prop, string);
1266 if (cmp_it->id < 0)
1267 goto no_composition;
1268 cmp_it->nchars = end - start;
1269 cmp_it->nglyphs = composition_table[cmp_it->id]->glyph_len;
1270 }
29fb7306 1271 else if (w)
58753d74 1272 {
10f72a37 1273 Lisp_Object lgstring = Qnil;
b246f932 1274 Lisp_Object val, elt;
5eb55db9 1275 EMACS_INT i;
58753d74
KH
1276
1277 val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch);
10f72a37 1278 for (i = 0; i < cmp_it->rule_idx; i++, val = XCDR (val));
82ebc97b 1279 if (charpos < endpos)
90b3fe91 1280 {
82ebc97b
KH
1281 for (; CONSP (val); val = XCDR (val))
1282 {
1283 elt = XCAR (val);
10f72a37
KH
1284 if (! VECTORP (elt) || ASIZE (elt) != 3
1285 || ! INTEGERP (AREF (elt, 1)))
1286 continue;
1287 if (XFASTINT (AREF (elt, 1)) != cmp_it->lookback)
aa3830c4 1288 goto no_composition;
10f72a37
KH
1289 lgstring = autocmp_chars (elt, charpos, bytepos, endpos,
1290 w, face, string);
1291 if (composition_gstring_p (lgstring))
82ebc97b 1292 break;
10f72a37
KH
1293 lgstring = Qnil;
1294 /* Composition failed perhaps because the font doesn't
1295 support sufficient range of characters. Try the
1296 other composition rules if any. */
82ebc97b 1297 }
82ebc97b 1298 cmp_it->reversed_p = 0;
90b3fe91 1299 }
82ebc97b
KH
1300 else
1301 {
10f72a37 1302 EMACS_INT cpos = charpos, bpos = bytepos;
90b3fe91 1303
10f72a37 1304 while (1)
82ebc97b 1305 {
10f72a37
KH
1306 elt = XCAR (val);
1307 if (cmp_it->lookback > 0)
1308 {
f6aa6ec6 1309 cpos = charpos - cmp_it->lookback;
10f72a37
KH
1310 if (STRINGP (string))
1311 bpos = string_char_to_byte (string, cpos);
1312 else
1313 bpos = CHAR_TO_BYTE (cpos);
1314 }
1315 lgstring = autocmp_chars (elt, cpos, bpos, charpos + 1, w, face,
1316 string);
1317 if (composition_gstring_p (lgstring)
1318 && cpos + LGSTRING_CHAR_LEN (lgstring) - 1 == charpos)
1319 break;
1320 /* Composition failed or didn't cover the current
1321 character. */
1322 if (cmp_it->lookback == 0)
82ebc97b 1323 goto no_composition;
10f72a37
KH
1324 lgstring = Qnil;
1325 /* Try to find a shorter compostion that starts after CPOS. */
1326 composition_compute_stop_pos (cmp_it, charpos, bytepos, cpos,
1327 string);
1328 if (cmp_it->ch == -2 || cmp_it->stop_pos < charpos)
1329 goto no_composition;
1330 val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch);
1331 for (i = 0; i < cmp_it->rule_idx; i++, val = XCDR (val));
82ebc97b
KH
1332 }
1333 cmp_it->reversed_p = 1;
1334 }
10f72a37
KH
1335 if (NILP (lgstring))
1336 goto no_composition;
1337 if (NILP (LGSTRING_ID (lgstring)))
1338 lgstring = composition_gstring_put_cache (lgstring, -1);
1339 cmp_it->id = XINT (LGSTRING_ID (lgstring));
1340 for (i = 0; i < LGSTRING_GLYPH_LEN (lgstring); i++)
1341 if (NILP (LGSTRING_GLYPH (lgstring, i)))
58753d74
KH
1342 break;
1343 cmp_it->nglyphs = i;
82ebc97b
KH
1344 cmp_it->from = 0;
1345 cmp_it->to = i;
58753d74 1346 }
29fb7306
KH
1347 else
1348 goto no_composition;
58753d74
KH
1349 return 1;
1350
1351 no_composition:
82ebc97b
KH
1352 if (charpos == endpos)
1353 return 0;
1354 if (charpos < endpos)
1355 {
1356 charpos++;
10f72a37 1357 if (NILP (string))
82ebc97b 1358 INC_POS (bytepos);
10f72a37 1359 else
aa3830c4 1360 bytepos += BYTES_BY_CHAR_HEAD (*(SDATA (string) + bytepos));
82ebc97b 1361 }
58753d74 1362 else
82ebc97b
KH
1363 {
1364 charpos--;
1365 /* BYTEPOS is calculated in composition_compute_stop_pos */
1366 bytepos = -1;
1367 }
58753d74
KH
1368 composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string);
1369 return 0;
1370}
1371
10f72a37
KH
1372/* Update charpos, nchars, nbytes, and width of the current grapheme
1373 cluster.
1374
1375 If the composition is static or automatic in L2R context, the
1376 cluster is identified by CMP_IT->from, and CHARPOS is the position
1377 of the first character of the cluster. In this case, update
1378 CMP_IT->to too.
1379
1380 If the composition is automatic in R2L context, the cluster is
1381 identified by CMP_IT->to, and CHARPOS is the position of the last
1382 character of the cluster. In this case, update CMP_IT->from too.
1383
1384 The return value is the character code of the first character of
1385 the cluster, or -1 if the composition is somehow broken. */
82ebc97b 1386
58753d74 1387int
971de7fb 1388composition_update_it (struct composition_it *cmp_it, EMACS_INT charpos, EMACS_INT bytepos, Lisp_Object string)
58753d74 1389{
80e079b2 1390 int i, c IF_LINT (= 0);
58753d74
KH
1391
1392 if (cmp_it->ch < 0)
1393 {
10f72a37 1394 /* static composition */
58753d74
KH
1395 struct composition *cmp = composition_table[cmp_it->id];
1396
10f72a37 1397 cmp_it->charpos = charpos;
58753d74
KH
1398 cmp_it->to = cmp_it->nglyphs;
1399 if (cmp_it->nglyphs == 0)
1400 c = -1;
1401 else
1402 {
1403 for (i = 0; i < cmp->glyph_len; i++)
1404 if ((c = COMPOSITION_GLYPH (cmp, i)) != '\t')
1405 break;
1406 if (c == '\t')
1407 c = ' ';
1408 }
1409 cmp_it->width = cmp->width;
10f72a37
KH
1410 charpos += cmp_it->nchars;
1411 if (STRINGP (string))
1412 cmp_it->nbytes = string_char_to_byte (string, charpos) - bytepos;
1413 else
1414 cmp_it->nbytes = CHAR_TO_BYTE (charpos) - bytepos;
58753d74
KH
1415 }
1416 else
1417 {
10f72a37 1418 /* automatic composition */
58753d74 1419 Lisp_Object gstring = composition_gstring_from_id (cmp_it->id);
10f72a37 1420 Lisp_Object glyph;
579c18d0 1421 EMACS_INT from;
58753d74
KH
1422
1423 if (cmp_it->nglyphs == 0)
1424 {
58753d74
KH
1425 cmp_it->nchars = LGSTRING_CHAR_LEN (gstring);
1426 cmp_it->width = 0;
10f72a37
KH
1427 cmp_it->from = cmp_it->to = 0;
1428 return -1;
58753d74 1429 }
10f72a37 1430 if (! cmp_it->reversed_p)
58753d74 1431 {
10f72a37
KH
1432 glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
1433 from = LGLYPH_FROM (glyph);
58753d74
KH
1434 for (cmp_it->to = cmp_it->from + 1; cmp_it->to < cmp_it->nglyphs;
1435 cmp_it->to++)
1436 {
1437 glyph = LGSTRING_GLYPH (gstring, cmp_it->to);
1438 if (LGLYPH_FROM (glyph) != from)
1439 break;
58753d74 1440 }
10f72a37 1441 cmp_it->charpos = charpos;
58753d74 1442 }
82ebc97b
KH
1443 else
1444 {
10f72a37
KH
1445 glyph = LGSTRING_GLYPH (gstring, cmp_it->to - 1);
1446 from = LGLYPH_FROM (glyph);
1447 cmp_it->charpos = charpos - (LGLYPH_TO (glyph) - from);
1448 for (cmp_it->from = cmp_it->to - 1; cmp_it->from > 0;
1449 cmp_it->from--)
82ebc97b 1450 {
10f72a37 1451 glyph = LGSTRING_GLYPH (gstring, cmp_it->from - 1);
82ebc97b
KH
1452 if (LGLYPH_FROM (glyph) != from)
1453 break;
82ebc97b 1454 }
10f72a37
KH
1455 }
1456 glyph = LGSTRING_GLYPH (gstring, cmp_it->from);
1457 cmp_it->nchars = LGLYPH_TO (glyph) + 1 - from;
1458 cmp_it->nbytes = 0;
1459 cmp_it->width = 0;
1460 for (i = cmp_it->nchars - 1; i >= 0; i--)
1461 {
1462 c = XINT (LGSTRING_CHAR (gstring, i));
1463 cmp_it->nbytes += CHAR_BYTES (c);
63257f6c 1464 cmp_it->width += CHAR_WIDTH (c);
82ebc97b 1465 }
58753d74 1466 }
58753d74
KH
1467 return c;
1468}
1469
1470
90b3fe91
KH
1471struct position_record
1472{
1473 EMACS_INT pos, pos_byte;
1474 unsigned char *p;
1475};
1476
763d7377 1477/* Update the members of POSITION to the next character boundary. */
90b3fe91
KH
1478#define FORWARD_CHAR(POSITION, STOP) \
1479 do { \
90b3fe91 1480 (POSITION).pos++; \
900c4486
KH
1481 if ((POSITION).pos == (STOP)) \
1482 { \
1483 (POSITION).p = GAP_END_ADDR; \
1484 (POSITION).pos_byte = GPT_BYTE; \
1485 } \
1486 else \
1487 { \
900c4486 1488 (POSITION).pos_byte += BYTES_BY_CHAR_HEAD (*((POSITION).p)); \
d45a49e3 1489 (POSITION).p += BYTES_BY_CHAR_HEAD (*((POSITION).p)); \
900c4486 1490 } \
90b3fe91
KH
1491 } while (0)
1492
763d7377 1493/* Update the members of POSITION to the previous character boundary. */
90b3fe91
KH
1494#define BACKWARD_CHAR(POSITION, STOP) \
1495 do { \
a28d4396 1496 if ((POSITION).pos == (STOP)) \
90b3fe91
KH
1497 (POSITION).p = GPT_ADDR; \
1498 do { \
1499 (POSITION).pos_byte--; \
1500 (POSITION).p--; \
1501 } while (! CHAR_HEAD_P (*((POSITION).p))); \
1502 (POSITION).pos--; \
1503 } while (0)
1504
90b3fe91 1505/* This is like find_composition, but find an automatic composition
a28d4396
KH
1506 instead. It is assured that POS is not within a static
1507 composition. If found, set *GSTRING to the glyph-string
1508 representing the composition, and return 1. Otherwise, *GSTRING to
1509 Qnil, and return 0. */
90b3fe91
KH
1510
1511static int
a28d4396
KH
1512find_automatic_composition (EMACS_INT pos, EMACS_INT limit,
1513 EMACS_INT *start, EMACS_INT *end,
1514 Lisp_Object *gstring, Lisp_Object string)
90b3fe91
KH
1515{
1516 EMACS_INT head, tail, stop;
a28d4396
KH
1517 /* Forward limit position of checking a composition taking a
1518 looking-back count into account. */
895416e3 1519 EMACS_INT fore_check_limit;
a28d4396 1520 struct position_record cur, prev;
90b3fe91 1521 int c;
29fb7306 1522 Lisp_Object window;
90b3fe91 1523 struct window *w;
a28d4396 1524 int need_adjustment = 0;
90b3fe91 1525
3eda4b19 1526 window = Fget_buffer_window (Fcurrent_buffer (), Qnil);
29fb7306
KH
1527 if (NILP (window))
1528 return 0;
1529 w = XWINDOW (window);
1530
a28d4396 1531 cur.pos = pos;
90b3fe91
KH
1532 if (NILP (string))
1533 {
1534 head = BEGV, tail = ZV, stop = GPT;
a28d4396
KH
1535 cur.pos_byte = CHAR_TO_BYTE (cur.pos);
1536 cur.p = BYTE_POS_ADDR (cur.pos_byte);
90b3fe91
KH
1537 }
1538 else
1539 {
1540 head = 0, tail = SCHARS (string), stop = -1;
a28d4396
KH
1541 cur.pos_byte = string_char_to_byte (string, cur.pos);
1542 cur.p = SDATA (string) + cur.pos_byte;
90b3fe91 1543 }
a28d4396
KH
1544 if (limit < 0)
1545 /* Finding a composition covering the character after POS is the
1546 same as setting LIMIT to POS. */
1547 limit = pos;
1548 if (limit <= pos)
1549 fore_check_limit = min (tail, pos + 1 + MAX_AUTO_COMPOSITION_LOOKBACK);
90b3fe91 1550 else
895416e3 1551 fore_check_limit = min (tail, limit + MAX_AUTO_COMPOSITION_LOOKBACK);
90b3fe91 1552
a28d4396 1553 /* Provided that we have these possible compositions now:
895416e3 1554
a28d4396
KH
1555 POS: 1 2 3 4 5 6 7 8 9
1556 |-A-|
1557 |-B-|-C-|--D--|
895416e3 1558
a28d4396
KH
1559 Here, it is known that characters after positions 1 and 9 can
1560 never be composed (i.e. ! CHAR_COMPOSABLE_P (CH)), and
1561 composition A is an invalid one because it's partially covered by
1562 the valid composition C. And to know whether a composition is
1563 valid or not, the only way is to start searching forward from a
1564 position that can not be a tail part of composition (it's 2 in
1565 the above case).
1566
1567 Now we have these cases (1 through 4):
1568
1569 -- character after POS is ... --
1570 not composable composable
1571 LIMIT <= POS (1) (3)
1572 POS < LIMIT (2) (4)
1573
1574 Among them, in case (2), we simply search forward from POS.
1575
1576 In the other cases, we at first rewind back to the position where
1577 the previous character is not composable or the beginning of
1578 buffer (string), then search compositions forward. In case (1)
1579 and (3) we repeat this process until a composition is found. */
1580
1581 while (1)
1582 {
62a6e103 1583 c = STRING_CHAR (cur.p);
90b3fe91 1584 if (! CHAR_COMPOSABLE_P (c))
90b3fe91 1585 {
a28d4396
KH
1586 if (limit <= pos) /* case (1) */
1587 {
1588 do {
31bfc35c 1589 if (cur.pos <= limit)
a28d4396
KH
1590 return 0;
1591 BACKWARD_CHAR (cur, stop);
1592 c = STRING_CHAR (cur.p);
1593 } while (! CHAR_COMPOSABLE_P (c));
1594 fore_check_limit = cur.pos + 1;
1595 }
1596 else /* case (2) */
1597 /* No need of rewinding back. */
1598 goto search_forward;
1599 }
90b3fe91 1600
a28d4396
KH
1601 /* Rewind back to the position where we can safely search
1602 forward for compositions. It is assured that the character
1603 at cur.pos is composable. */
1604 while (head < cur.pos)
1605 {
1606 prev = cur;
1607 BACKWARD_CHAR (cur, stop);
1608 c = STRING_CHAR (cur.p);
1609 if (! CHAR_COMPOSABLE_P (c))
90b3fe91 1610 {
a28d4396
KH
1611 cur = prev;
1612 break;
90b3fe91 1613 }
a28d4396
KH
1614 }
1615
1616 search_forward:
1617 /* Now search forward. */
1618 *gstring = Qnil;
1619 prev = cur; /* remember the start of searching position. */
1620 while (cur.pos < fore_check_limit)
1621 {
1622 Lisp_Object val;
1623
1624 c = STRING_CHAR (cur.p);
1625 for (val = CHAR_TABLE_REF (Vcomposition_function_table, c);
1626 CONSP (val); val = XCDR (val))
90b3fe91 1627 {
a28d4396
KH
1628 Lisp_Object elt = XCAR (val);
1629
1630 if (VECTORP (elt) && ASIZE (elt) == 3 && NATNUMP (AREF (elt, 1)))
90b3fe91 1631 {
a28d4396
KH
1632 EMACS_INT check_pos = cur.pos - XFASTINT (AREF (elt, 1));
1633 struct position_record check;
1634
1635 if (check_pos < head
1636 || (limit <= pos ? pos < check_pos
1637 : limit <= check_pos))
1638 continue;
1639 for (check = cur; check_pos < check.pos; )
1640 BACKWARD_CHAR (check, stop);
1641 *gstring = autocmp_chars (elt, check.pos, check.pos_byte,
1642 tail, w, NULL, string);
90b3fe91 1643 need_adjustment = 1;
a28d4396 1644 if (NILP (*gstring))
90b3fe91 1645 {
a28d4396
KH
1646 /* As we have called Lisp, there's a possibility
1647 that buffer/string is relocated. */
1648 if (NILP (string))
1649 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1650 else
1651 cur.p = SDATA (string) + cur.pos_byte;
1652 }
1653 else
1654 {
1655 /* We found a candidate of a target composition. */
90b3fe91
KH
1656 *start = check.pos;
1657 *end = check.pos + LGSTRING_CHAR_LEN (*gstring);
a28d4396
KH
1658 if (pos < limit
1659 ? pos < *end
1660 : *start <= pos && pos < *end)
1661 /* This is the target composition. */
90b3fe91
KH
1662 return 1;
1663 cur.pos = *end;
a28d4396
KH
1664 if (NILP (string))
1665 {
1666 cur.pos_byte = CHAR_TO_BYTE (cur.pos);
1667 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1668 }
1669 else
1670 {
1671 cur.pos_byte = string_char_to_byte (string, cur.pos);
1672 cur.p = SDATA (string) + cur.pos_byte;
1673 }
90b3fe91
KH
1674 break;
1675 }
1676 }
1677 }
a28d4396
KH
1678 if (! CONSP (val))
1679 /* We found no composition here. */
90b3fe91 1680 FORWARD_CHAR (cur, stop);
90b3fe91 1681 }
a28d4396
KH
1682
1683 if (pos < limit) /* case (2) and (4)*/
1684 return 0;
1685 if (! NILP (*gstring))
1686 return 1;
1687 if (prev.pos == head)
1688 return 0;
90b3fe91 1689 cur = prev;
a28d4396
KH
1690 if (need_adjustment)
1691 {
1692 if (NILP (string))
1693 cur.p = BYTE_POS_ADDR (cur.pos_byte);
1694 else
1695 cur.p = SDATA (string) + cur.pos_byte;
1696 }
90b3fe91 1697 BACKWARD_CHAR (cur, stop);
90b3fe91 1698 }
90b3fe91
KH
1699}
1700
e614ea00
KH
1701/* Return the adjusted point provided that point is moved from LAST_PT
1702 to NEW_PT. */
1703
41118bd3 1704EMACS_INT
971de7fb 1705composition_adjust_point (EMACS_INT last_pt, EMACS_INT new_pt)
58753d74 1706{
5eb55db9 1707 EMACS_INT i, beg, end;
90b3fe91 1708 Lisp_Object val;
58753d74 1709
e614ea00
KH
1710 if (new_pt == BEGV || new_pt == ZV)
1711 return new_pt;
58753d74 1712
90b3fe91 1713 /* At first check the static composition. */
e614ea00 1714 if (get_property_and_range (new_pt, Qcomposition, &val, &beg, &end, Qnil)
895416e3
KH
1715 && COMPOSITION_VALID_P (beg, end, val))
1716 {
e614ea00 1717 if (beg < new_pt /* && end > new_pt <- It's always the case. */
895416e3 1718 && (last_pt <= beg || last_pt >= end))
e614ea00
KH
1719 return (new_pt < last_pt ? beg : end);
1720 return new_pt;
895416e3 1721 }
58753d74 1722
4b4deea2 1723 if (NILP (BVAR (current_buffer, enable_multibyte_characters))
d9a7c140 1724 || NILP (Vauto_composition_mode))
e614ea00 1725 return new_pt;
58753d74 1726
90b3fe91 1727 /* Next check the automatic composition. */
e614ea00
KH
1728 if (! find_automatic_composition (new_pt, (EMACS_INT) -1, &beg, &end, &val,
1729 Qnil)
1730 || beg == new_pt)
1731 return new_pt;
90b3fe91 1732 for (i = 0; i < LGSTRING_GLYPH_LEN (val); i++)
58753d74 1733 {
90b3fe91 1734 Lisp_Object glyph = LGSTRING_GLYPH (val, i);
58753d74 1735
90b3fe91
KH
1736 if (NILP (glyph))
1737 break;
e614ea00
KH
1738 if (beg + LGLYPH_FROM (glyph) == new_pt)
1739 return new_pt;
1740 if (beg + LGLYPH_TO (glyph) >= new_pt)
1741 return (new_pt < last_pt
90b3fe91
KH
1742 ? beg + LGLYPH_FROM (glyph)
1743 : beg + LGLYPH_TO (glyph) + 1);
58753d74 1744 }
e614ea00 1745 return new_pt;
58753d74
KH
1746}
1747
1748DEFUN ("composition-get-gstring", Fcomposition_get_gstring,
1749 Scomposition_get_gstring, 4, 4, 0,
1750 doc: /* Return a glyph-string for characters between FROM and TO.
9d751859 1751If the glyph string is for graphic display, FONT-OBJECT must be
58753d74 1752a font-object to use for those characters.
f5199465
KH
1753Otherwise (for terminal display), FONT-OBJECT must be a terminal ID, a
1754frame, or nil for the selected frame's terminal device.
58753d74
KH
1755
1756If the optional 4th argument STRING is not nil, it is a string
1757containing the target characters between indices FROM and TO.
1758
9d751859
EZ
1759A glyph-string is a vector containing information about how to display
1760a specific character sequence. The format is:
58753d74
KH
1761 [HEADER ID GLYPH ...]
1762
1763HEADER is a vector of this form:
1764 [FONT-OBJECT CHAR ...]
1765where
1766 FONT-OBJECT is a font-object for all glyphs in the glyph-string,
f5199465 1767 or the terminal coding system of the specified terminal.
58753d74
KH
1768 CHARs are characters to be composed by GLYPHs.
1769
1770ID is an identification number of the glyph-string. It may be nil if
1771not yet shaped.
1772
9d751859 1773GLYPH is a vector whose elements have this form:
58753d74
KH
1774 [ FROM-IDX TO-IDX C CODE WIDTH LBEARING RBEARING ASCENT DESCENT
1775 [ [X-OFF Y-OFF WADJUST] | nil] ]
1776where
1777 FROM-IDX and TO-IDX are used internally and should not be touched.
1778 C is the character of the glyph.
1779 CODE is the glyph-code of C in FONT-OBJECT.
1780 WIDTH thru DESCENT are the metrics (in pixels) of the glyph.
9d751859 1781 X-OFF and Y-OFF are offsets to the base position for the glyph.
58753d74
KH
1782 WADJUST is the adjustment to the normal width of the glyph.
1783
9d751859
EZ
1784If GLYPH is nil, the remaining elements of the glyph-string vector
1785should be ignored. */)
5842a27b 1786 (Lisp_Object from, Lisp_Object to, Lisp_Object font_object, Lisp_Object string)
58753d74
KH
1787{
1788 Lisp_Object gstring, header;
46f905e1 1789 EMACS_INT frompos, topos;
58753d74 1790
ea8ba975
KH
1791 CHECK_NATNUM (from);
1792 CHECK_NATNUM (to);
f5199465
KH
1793 if (! FONT_OBJECT_P (font_object))
1794 {
1795 struct coding_system *coding;
1796 struct terminal *terminal = get_terminal (font_object, 1);
1797
1798 coding = ((TERMINAL_TERMINAL_CODING (terminal)->common_flags
1799 & CODING_REQUIRE_ENCODING_MASK)
1800 ? TERMINAL_TERMINAL_CODING (terminal) : &safe_terminal_coding);
1801 font_object = CODING_ID_NAME (coding->id);
1802 }
1803
58753d74
KH
1804 header = fill_gstring_header (Qnil, from, to, font_object, string);
1805 gstring = gstring_lookup_cache (header);
1806 if (! NILP (gstring))
1807 return gstring;
46f905e1 1808
ea8ba975
KH
1809 frompos = XINT (from);
1810 topos = XINT (to);
46f905e1
SM
1811 if (LGSTRING_GLYPH_LEN (gstring_work) < topos - frompos)
1812 gstring_work = Fmake_vector (make_number (topos - frompos + 2), Qnil);
58753d74
KH
1813 LGSTRING_SET_HEADER (gstring_work, header);
1814 LGSTRING_SET_ID (gstring_work, Qnil);
1815 fill_gstring_body (gstring_work);
1816 return gstring_work;
1817}
1818
ca4c9455
KH
1819\f
1820/* Emacs Lisp APIs. */
1821
1822DEFUN ("compose-region-internal", Fcompose_region_internal,
1823 Scompose_region_internal, 2, 4, 0,
335c5470
PJ
1824 doc: /* Internal use only.
1825
1826Compose text in the region between START and END.
1827Optional 3rd and 4th arguments are COMPONENTS and MODIFICATION-FUNC
9d751859 1828for the composition. See `compose-region' for more details. */)
5842a27b 1829 (Lisp_Object start, Lisp_Object end, Lisp_Object components, Lisp_Object modification_func)
ca4c9455
KH
1830{
1831 validate_region (&start, &end);
1832 if (!NILP (components)
1833 && !INTEGERP (components)
1834 && !CONSP (components)
1835 && !STRINGP (components))
b7826503 1836 CHECK_VECTOR (components);
ca4c9455 1837
d562f8ab 1838 compose_text (XINT (start), XINT (end), components, modification_func, Qnil);
ca4c9455
KH
1839 return Qnil;
1840}
1841
1842DEFUN ("compose-string-internal", Fcompose_string_internal,
1843 Scompose_string_internal, 3, 5, 0,
335c5470
PJ
1844 doc: /* Internal use only.
1845
1846Compose text between indices START and END of STRING.
1847Optional 4th and 5th arguments are COMPONENTS and MODIFICATION-FUNC
9d751859 1848for the composition. See `compose-string' for more details. */)
5842a27b 1849 (Lisp_Object string, Lisp_Object start, Lisp_Object end, Lisp_Object components, Lisp_Object modification_func)
ca4c9455 1850{
b7826503
PJ
1851 CHECK_STRING (string);
1852 CHECK_NUMBER (start);
1853 CHECK_NUMBER (end);
ca4c9455
KH
1854
1855 if (XINT (start) < 0 ||
1856 XINT (start) > XINT (end)
d5db4077 1857 || XINT (end) > SCHARS (string))
ca4c9455
KH
1858 args_out_of_range (start, end);
1859
d562f8ab 1860 compose_text (XINT (start), XINT (end), components, modification_func, string);
ca4c9455
KH
1861 return string;
1862}
1863
1864DEFUN ("find-composition-internal", Ffind_composition_internal,
177c0ea7 1865 Sfind_composition_internal, 4, 4, 0,
335c5470
PJ
1866 doc: /* Internal use only.
1867
1868Return information about composition at or nearest to position POS.
9d751859 1869See `find-composition' for more details. */)
5842a27b 1870 (Lisp_Object pos, Lisp_Object limit, Lisp_Object string, Lisp_Object detail_p)
ca4c9455 1871{
90b3fe91
KH
1872 Lisp_Object prop, tail, gstring;
1873 EMACS_INT start, end, from, to;
ca4c9455
KH
1874 int id;
1875
b7826503 1876 CHECK_NUMBER_COERCE_MARKER (pos);
90b3fe91 1877 from = XINT (pos);
ca4c9455
KH
1878 if (!NILP (limit))
1879 {
b7826503 1880 CHECK_NUMBER_COERCE_MARKER (limit);
90b3fe91 1881 to = XINT (limit);
ca4c9455
KH
1882 }
1883 else
90b3fe91 1884 to = -1;
177c0ea7 1885
ca4c9455 1886 if (!NILP (string))
e3b3e327 1887 {
b7826503 1888 CHECK_STRING (string);
d5db4077 1889 if (XINT (pos) < 0 || XINT (pos) > SCHARS (string))
e3b3e327
GM
1890 args_out_of_range (string, pos);
1891 }
1892 else
1893 {
fa9090b8 1894 if (XINT (pos) < BEGV || XINT (pos) > ZV)
e3b3e327
GM
1895 args_out_of_range (Fcurrent_buffer (), pos);
1896 }
ca4c9455 1897
90b3fe91
KH
1898 if (!find_composition (from, to, &start, &end, &prop, string))
1899 {
4b4deea2 1900 if (!NILP (BVAR (current_buffer, enable_multibyte_characters))
d9a7c140 1901 && ! NILP (Vauto_composition_mode)
90b3fe91
KH
1902 && find_automatic_composition (from, to, &start, &end, &gstring,
1903 string))
1904 return list3 (make_number (start), make_number (end), gstring);
1905 return Qnil;
1906 }
1907 if ((end <= XINT (pos) || start > XINT (pos)))
1908 {
1909 EMACS_INT s, e;
1910
1911 if (find_automatic_composition (from, to, &s, &e, &gstring, string)
1912 && (e <= XINT (pos) ? e > end : s < start))
fcebfc6a 1913 return list3 (make_number (s), make_number (e), gstring);
90b3fe91 1914 }
ca4c9455
KH
1915 if (!COMPOSITION_VALID_P (start, end, prop))
1916 return Fcons (make_number (start), Fcons (make_number (end),
1917 Fcons (Qnil, Qnil)));
1918 if (NILP (detail_p))
1919 return Fcons (make_number (start), Fcons (make_number (end),
1920 Fcons (Qt, Qnil)));
1921
1922 if (COMPOSITION_REGISTERD_P (prop))
1923 id = COMPOSITION_ID (prop);
1924 else
1925 {
579c18d0
EZ
1926 EMACS_INT start_byte = (NILP (string)
1927 ? CHAR_TO_BYTE (start)
1928 : string_char_to_byte (string, start));
ca4c9455
KH
1929 id = get_composition_id (start, start_byte, end - start, prop, string);
1930 }
1931
1932 if (id >= 0)
1933 {
1934 Lisp_Object components, relative_p, mod_func;
1935 enum composition_method method = COMPOSITION_METHOD (prop);
1936 int width = composition_table[id]->width;
1937
1938 components = Fcopy_sequence (COMPOSITION_COMPONENTS (prop));
1939 relative_p = (method == COMPOSITION_WITH_RULE_ALTCHARS
1940 ? Qnil : Qt);
1941 mod_func = COMPOSITION_MODIFICATION_FUNC (prop);
1942 tail = Fcons (components,
1943 Fcons (relative_p,
1944 Fcons (mod_func,
1945 Fcons (make_number (width), Qnil))));
1946 }
1947 else
1948 tail = Qnil;
1949
1950 return Fcons (make_number (start), Fcons (make_number (end), tail));
1951}
1952
1953\f
1954void
971de7fb 1955syms_of_composite (void)
ca4c9455 1956{
58753d74
KH
1957 int i;
1958
cd3520a4 1959 DEFSYM (Qcomposition, "composition");
ca4c9455 1960
58753d74 1961 /* Make a hash table for static composition. */
ca4c9455 1962 {
09654086 1963 Lisp_Object args[6];
177c0ea7 1964
ca4c9455
KH
1965 args[0] = QCtest;
1966 args[1] = Qequal;
1967 args[2] = QCweakness;
dc47eccc 1968 /* We used to make the hash table weak so that unreferenced
ca101cff 1969 compositions can be garbage-collected. But, usually once
dc47eccc
KH
1970 created compositions are repeatedly used in an Emacs session,
1971 and thus it's not worth to save memory in such a way. So, we
1972 make the table not weak. */
6a83ee8a 1973 args[3] = Qnil;
ca4c9455
KH
1974 args[4] = QCsize;
1975 args[5] = make_number (311);
09654086 1976 composition_hash_table = Fmake_hash_table (6, args);
ca4c9455
KH
1977 staticpro (&composition_hash_table);
1978 }
1979
58753d74
KH
1980 /* Make a hash table for glyph-string. */
1981 {
1982 Lisp_Object args[6];
58753d74
KH
1983 args[0] = QCtest;
1984 args[1] = Qequal;
1985 args[2] = QCweakness;
1986 args[3] = Qnil;
1987 args[4] = QCsize;
1988 args[5] = make_number (311);
1989 gstring_hash_table = Fmake_hash_table (6, args);
1990 staticpro (&gstring_hash_table);
1991 }
1992
1993 staticpro (&gstring_work_headers);
1994 gstring_work_headers = Fmake_vector (make_number (8), Qnil);
1995 for (i = 0; i < 8; i++)
1996 ASET (gstring_work_headers, i, Fmake_vector (make_number (i + 2), Qnil));
1997 staticpro (&gstring_work);
1998 gstring_work = Fmake_vector (make_number (10), Qnil);
1999
ca4c9455
KH
2000 /* Text property `composition' should be nonsticky by default. */
2001 Vtext_property_default_nonsticky
2002 = Fcons (Fcons (Qcomposition, Qt), Vtext_property_default_nonsticky);
2003
29208e82 2004 DEFVAR_LISP ("compose-chars-after-function", Vcompose_chars_after_function,
335c5470
PJ
2005 doc: /* Function to adjust composition of buffer text.
2006
9d751859
EZ
2007This function is called with three arguments: FROM, TO, and OBJECT.
2008FROM and TO specify the range of text whose composition should be
335c5470
PJ
2009adjusted. OBJECT, if non-nil, is a string that contains the text.
2010
2011This function is called after a text with `composition' property is
2012inserted or deleted to keep `composition' property of buffer text
2013valid.
2014
2015The default value is the function `compose-chars-after'. */);
d67b4f80 2016 Vcompose_chars_after_function = intern_c_string ("compose-chars-after");
ca4c9455 2017
cd3520a4
JB
2018 DEFSYM (Qauto_composed, "auto-composed");
2019 DEFSYM (Qauto_composition_function, "auto-composition-function");
40add26d 2020
29208e82 2021 DEFVAR_LISP ("auto-composition-mode", Vauto_composition_mode,
d9a7c140
KH
2022 doc: /* Non-nil if Auto-Composition mode is enabled.
2023Use the command `auto-composition-mode' to change this variable. */);
2024 Vauto_composition_mode = Qt;
2025
29208e82 2026 DEFVAR_LISP ("auto-composition-function", Vauto_composition_function,
f96ba4c1 2027 doc: /* Function to call to compose characters automatically.
9d751859 2028This function is called from the display routine with four arguments:
ddc872ba 2029FROM, TO, WINDOW, and STRING.
335c5470 2030
ddc872ba
KH
2031If STRING is nil, the function must compose characters in the region
2032between FROM and TO in the current buffer.
335c5470 2033
ddc872ba
KH
2034Otherwise, STRING is a string, and FROM and TO are indices into the
2035string. In this case, the function must compose characters in the
2036string. */);
f96ba4c1 2037 Vauto_composition_function = Qnil;
40add26d 2038
29208e82 2039 DEFVAR_LISP ("composition-function-table", Vcomposition_function_table,
9d751859 2040 doc: /* Char-table of functions for automatic character composition.
58753d74
KH
2041For each character that has to be composed automatically with
2042preceding and/or following characters, this char-table contains
2043a function to call to compose that character.
2044
2045The element at index C in the table, if non-nil, is a list of
10f72a37 2046composition rules of this form: ([PATTERN PREV-CHARS FUNC] ...)
58753d74 2047
9d751859 2048PATTERN is a regular expression which C and the surrounding
58753d74
KH
2049characters must match.
2050
895416e3
KH
2051PREV-CHARS is a non-negative integer (less than 4) specifying how many
2052characters before C to check the matching with PATTERN. If it is 0,
2053PATTERN must match C and the following characters. If it is 1,
2054PATTERN must match a character before C and the following characters.
58753d74
KH
2055
2056If PREV-CHARS is 0, PATTERN can be nil, which means that the
2057single character C should be composed.
2058
2059FUNC is a function to return a glyph-string representing a
9d751859 2060composition of the characters that match PATTERN. It is
58753d74
KH
2061called with one argument GSTRING.
2062
2063GSTRING is a template of a glyph-string to return. It is already
2064filled with a proper header for the characters to compose, and
2065glyphs corresponding to those characters one by one. The
9d751859 2066function must return a new glyph-string with the same header as
58753d74
KH
2067GSTRING, or modify GSTRING itself and return it.
2068
2069See also the documentation of `auto-composition-mode'. */);
2070 Vcomposition_function_table = Fmake_char_table (Qnil, Qnil);
2071
ca4c9455
KH
2072 defsubr (&Scompose_region_internal);
2073 defsubr (&Scompose_string_internal);
2074 defsubr (&Sfind_composition_internal);
58753d74 2075 defsubr (&Scomposition_get_gstring);
ca4c9455 2076}