Commit | Line | Data |
---|---|---|
60370d40 | 1 | ;;; composite.el --- support character composition |
c674f351 KH |
2 | |
3 | ;; Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. | |
4 | ;; Licensed to the Free Software Foundation. | |
5 | ||
6 | ;; Keywords: mule, multilingual, character composition | |
7 | ||
8 | ;; This file is part of GNU Emacs. | |
9 | ||
10 | ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 | ;; it under the terms of the GNU General Public License as published by | |
12 | ;; the Free Software Foundation; either version 2, or (at your option) | |
13 | ;; any later version. | |
14 | ||
15 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | ;; GNU General Public License for more details. | |
19 | ||
20 | ;; You should have received a copy of the GNU General Public License | |
21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
23 | ;; Boston, MA 02111-1307, USA. | |
24 | ||
60370d40 PJ |
25 | ;;; Commentary: |
26 | ||
c674f351 KH |
27 | ;;; Code: |
28 | ||
c674f351 KH |
29 | (defconst reference-point-alist |
30 | '((tl . 0) (tc . 1) (tr . 2) | |
31 | (Bl . 3) (Bc . 4) (Br . 5) | |
32 | (bl . 6) (bc . 7) (br . 8) | |
33 | (cl . 9) (cc . 10) (cr . 11) | |
34 | (top-left . 0) (top-center . 1) (top-right . 2) | |
35 | (base-left . 3) (base-center . 4) (base-right . 5) | |
36 | (bottom-left . 6) (bottom-center . 7) (bottom-right . 8) | |
37 | (center-left . 9) (center-center . 10) (center-right . 11) | |
38 | ;; For backward compatibility... | |
39 | (ml . 3) (mc . 10) (mr . 5) | |
40 | (mid-left . 3) (mid-center . 10) (mid-right . 5)) | |
41 | "Alist of symbols vs integer codes of glyph reference points. | |
42 | A glyph reference point symbol is to be used to specify a composition | |
43 | rule in COMPONENTS argument to such functions as `compose-region' and | |
44 | `make-composition'. | |
45 | ||
46 | Meanings of glyph reference point codes are as follows: | |
47 | ||
48 | 0----1----2 <---- ascent 0:tl or top-left | |
49 | | | 1:tc or top-center | |
50 | | | 2:tr or top-right | |
51 | | | 3:Bl or base-left 9:cl or center-left | |
52 | 9 10 11 <---- center 4:Bc or base-center 10:cc or center-center | |
53 | | | 5:Br or base-right 11:cr or center-right | |
54 | --3----4----5-- <-- baseline 6:bl or bottom-left | |
55 | | | 7:bc or bottom-center | |
56 | 6----7----8 <---- descent 8:br or bottom-right | |
57 | ||
58 | Glyph reference point symbols are to be used to specify composition | |
59 | rule of the form \(GLOBAL-REF-POINT . NEW-REF-POINT), where | |
60 | GLOBAL-REF-POINT is a reference point in the overall glyphs already | |
61 | composed, and NEW-REF-POINT is a reference point in the new glyph to | |
62 | be added. | |
63 | ||
64 | For instance, if GLOBAL-REF-POINT is `br' (bottom-right) and | |
8f625692 | 65 | NEW-REF-POINT is `tc' (top-center), the overall glyph is updated as |
c674f351 KH |
66 | follows (the point `*' corresponds to both reference points): |
67 | ||
68 | +-------+--+ <--- new ascent | |
69 | | | | | |
70 | | global| | | |
71 | | glyph | | | |
72 | -- | | |-- <--- baseline \(doesn't change) | |
73 | +----+--*--+ | |
74 | | | new | | |
75 | | |glyph| | |
76 | +----+-----+ <--- new descent | |
77 | ") | |
78 | ||
79 | ;; Encode composition rule RULE into an integer value. RULE is a cons | |
80 | ;; of global and new reference point symbols. | |
81 | ;; This must be compatible with C macro COMPOSITION_ENCODE_RULE | |
82 | ;; defined in composite.h. | |
83 | ||
84 | (defun encode-composition-rule (rule) | |
85 | (if (and (integerp rule) (< rule 144)) | |
86 | ;; Already encoded. | |
87 | rule | |
88 | (or (consp rule) | |
89 | (error "Invalid composition rule: %S" rule)) | |
90 | (let ((gref (car rule)) | |
91 | (nref (cdr rule))) | |
92 | (or (integerp gref) | |
93 | (setq gref (cdr (assq gref reference-point-alist)))) | |
94 | (or (integerp nref) | |
95 | (setq nref (cdr (assq nref reference-point-alist)))) | |
96 | (or (and (>= gref 0) (< gref 12) (>= nref 0) (< nref 12)) | |
71296446 | 97 | (error "Invalid composition rule: %S" rule)) |
c674f351 KH |
98 | (+ (* gref 12) nref)))) |
99 | ||
100 | ;; Decode encoded composition rule RULE-CODE. The value is a cons of | |
101 | ;; global and new reference point symbols. | |
102 | ;; This must be compatible with C macro COMPOSITION_DECODE_RULE | |
103 | ;; defined in composite.h. | |
104 | ||
105 | (defun decode-composition-rule (rule-code) | |
106 | (or (and (natnump rule-code) (< rule-code 144)) | |
107 | (error "Invalid encoded composition rule: %S" rule-code)) | |
108 | (let ((gref (car (rassq (/ rule-code 12) reference-point-alist))) | |
109 | (nref (car (rassq (% rule-code 12) reference-point-alist)))) | |
110 | (or (and gref (symbolp gref) nref (symbolp nref)) | |
111 | (error "Invalid composition rule code: %S" rule-code)) | |
112 | (cons gref nref))) | |
113 | ||
114 | ;; Encode composition rules in composition components COMPONENTS. The | |
115 | ;; value is a copy of COMPONENTS, where composition rules (cons of | |
116 | ;; global and new glyph reference point symbols) are replaced with | |
117 | ;; encoded composition rules. Optional 2nd argument NOCOPY non-nil | |
118 | ;; means don't make a copy but modify COMPONENTS directly. | |
119 | ||
120 | (defun encode-composition-components (components &optional nocopy) | |
121 | (or nocopy | |
122 | (setq components (copy-sequence components))) | |
123 | (if (vectorp components) | |
124 | (let ((len (length components)) | |
125 | (i 1)) | |
126 | (while (< i len) | |
127 | (aset components i | |
128 | (encode-composition-rule (aref components i))) | |
129 | (setq i (+ i 2)))) | |
130 | (let ((tail (cdr components))) | |
131 | (while tail | |
132 | (setcar tail | |
133 | (encode-composition-rule (car tail))) | |
134 | (setq tail (nthcdr 2 tail))))) | |
135 | components) | |
136 | ||
137 | ;; Decode composition rule codes in composition components COMPONENTS. | |
138 | ;; The value is a copy of COMPONENTS, where composition rule codes are | |
139 | ;; replaced with composition rules (cons of global and new glyph | |
140 | ;; reference point symbols). Optional 2nd argument NOCOPY non-nil | |
141 | ;; means don't make a copy but modify COMPONENTS directly. | |
142 | ;; It is assumed that COMPONENTS is a vector and is for rule-base | |
143 | ;; composition, thus (2N+1)th elements are rule codes. | |
144 | ||
145 | (defun decode-composition-components (components &optional nocopy) | |
146 | (or nocopy | |
147 | (setq components (copy-sequence components))) | |
148 | (let ((len (length components)) | |
149 | (i 1)) | |
150 | (while (< i len) | |
151 | (aset components i | |
152 | (decode-composition-rule (aref components i))) | |
153 | (setq i (+ i 2)))) | |
154 | components) | |
155 | ||
c674f351 KH |
156 | (defun compose-region (start end &optional components modification-func) |
157 | "Compose characters in the current region. | |
158 | ||
c9f60860 KH |
159 | Characters are composed relatively, i.e. composed by overstricking or |
160 | stacking depending on ascent, descent and other properties. | |
161 | ||
c674f351 KH |
162 | When called from a program, expects these four arguments. |
163 | ||
164 | First two arguments START and END are positions (integers or markers) | |
165 | specifying the region. | |
166 | ||
167 | Optional 3rd argument COMPONENTS, if non-nil, is a character or a | |
c9f60860 KH |
168 | sequence (vector, list, or string) of integers. In this case, |
169 | characters are composed not relatively but according to COMPONENTS. | |
c674f351 KH |
170 | |
171 | If it is a character, it is an alternate character to display instead | |
172 | of the text in the region. | |
173 | ||
174 | If it is a string, the elements are alternate characters. | |
175 | ||
176 | If it is a vector or list, it is a sequence of alternate characters and | |
177 | composition rules, where (2N)th elements are characters and (2N+1)th | |
178 | elements are composition rules to specify how to compose (2N+2)th | |
179 | elements with previously composed N glyphs. | |
180 | ||
181 | A composition rule is a cons of global and new glyph reference point | |
182 | symbols. See the documentation of `reference-point-alist' for more | |
183 | detail. | |
184 | ||
185 | Optional 4th argument MODIFICATION-FUNC is a function to call to | |
186 | adjust the composition when it gets invalid because of a change of | |
187 | text in the composition." | |
188 | (interactive "r") | |
189 | (let ((modified-p (buffer-modified-p)) | |
190 | (buffer-read-only nil)) | |
191 | (if (or (vectorp components) (listp components)) | |
192 | (setq components (encode-composition-components components))) | |
193 | (compose-region-internal start end components modification-func) | |
194 | (set-buffer-modified-p modified-p))) | |
195 | ||
c674f351 KH |
196 | (defun decompose-region (start end) |
197 | "Decompose text in the current region. | |
198 | ||
199 | When called from a program, expects two arguments, | |
200 | positions (integers or markers) specifying the region." | |
201 | (interactive "r") | |
202 | (let ((modified-p (buffer-modified-p)) | |
203 | (buffer-read-only nil)) | |
204 | (remove-text-properties start end '(composition nil)) | |
205 | (set-buffer-modified-p modified-p))) | |
206 | ||
c674f351 KH |
207 | (defun compose-string (string &optional start end components modification-func) |
208 | "Compose characters in string STRING. | |
209 | ||
c1750694 | 210 | The return value is STRING with the `composition' property put on all |
c674f351 KH |
211 | the characters in it. |
212 | ||
213 | Optional 2nd and 3rd arguments START and END specify the range of | |
c1750694 | 214 | STRING to be composed. They default to the beginning and the end of |
c674f351 KH |
215 | STRING respectively. |
216 | ||
217 | Optional 4th argument COMPONENTS, if non-nil, is a character or a | |
218 | sequence (vector, list, or string) of integers. See the function | |
219 | `compose-region' for more detail. | |
220 | ||
221 | Optional 5th argument MODIFICATION-FUNC is a function to call to | |
222 | adjust the composition when it gets invalid because of a change of | |
223 | text in the composition." | |
224 | (if (or (vectorp components) (listp components)) | |
225 | (setq components (encode-composition-components components))) | |
226 | (or start (setq start 0)) | |
227 | (or end (setq end (length string))) | |
228 | (compose-string-internal string start end components modification-func) | |
229 | string) | |
230 | ||
c674f351 KH |
231 | (defun decompose-string (string) |
232 | "Return STRING where `composition' property is removed." | |
233 | (remove-text-properties 0 (length string) '(composition nil) string) | |
234 | string) | |
235 | ||
c674f351 KH |
236 | (defun compose-chars (&rest args) |
237 | "Return a string from arguments in which all characters are composed. | |
238 | For relative composition, arguments are characters. | |
239 | For rule-based composition, Mth \(where M is odd) arguments are | |
240 | characters, and Nth \(where N is even) arguments are composition rules. | |
241 | A composition rule is a cons of glyph reference points of the form | |
242 | \(GLOBAL-REF-POINT . NEW-REF-POINT). See the documentation of | |
243 | `reference-point-alist' for more detail." | |
244 | (let (str components) | |
245 | (if (consp (car (cdr args))) | |
246 | ;; Rule-base composition. | |
247 | (let ((len (length args)) | |
248 | (tail (encode-composition-components args 'nocopy))) | |
249 | ||
250 | (while tail | |
251 | (setq str (cons (car tail) str)) | |
252 | (setq tail (nthcdr 2 tail))) | |
253 | (setq str (concat (nreverse str)) | |
254 | components args)) | |
255 | ;; Relative composition. | |
256 | (setq str (concat args))) | |
257 | (compose-string-internal str 0 (length str) components))) | |
258 | ||
c674f351 KH |
259 | (defun find-composition (pos &optional limit string detail-p) |
260 | "Return information about a composition at or nearest to buffer position POS. | |
261 | ||
262 | If the character at POS has `composition' property, the value is a list | |
263 | of FROM, TO, and VALID-P. | |
264 | ||
265 | FROM and TO specify the range of text that has the same `composition' | |
266 | property, VALID-P is non-nil if and only if this composition is valid. | |
267 | ||
268 | If there's no composition at POS, and the optional 2nd argument LIMIT | |
269 | is non-nil, search for a composition toward LIMIT. | |
270 | ||
271 | If no composition is found, return nil. | |
272 | ||
273 | Optional 3rd argument STRING, if non-nil, is a string to look for a | |
274 | composition in; nil means the current buffer. | |
275 | ||
276 | If a valid composition is found and the optional 4th argument DETAIL-P | |
277 | is non-nil, the return value is a list of FROM, TO, COMPONENTS, | |
278 | RELATIVE-P, MOD-FUNC, and WIDTH. | |
279 | ||
280 | COMPONENTS is a vector of integers, the meaning depends on RELATIVE-P. | |
281 | ||
282 | RELATIVE-P is t if the composition method is relative, else nil. | |
283 | ||
284 | If RELATIVE-P is t, COMPONENTS is a vector of characters to be | |
285 | composed. If RELATIVE-P is nil, COMPONENTS is a vector of characters | |
286 | and composition rules as described in `compose-region'. | |
287 | ||
288 | MOD-FUNC is a modification function of the composition. | |
289 | ||
290 | WIDTH is a number of columns the composition occupies on the screen." | |
291 | (let ((result (find-composition-internal pos limit string detail-p))) | |
292 | (if (and detail-p result (nth 2 result) (not (nth 3 result))) | |
293 | ;; This is a valid rule-base composition. | |
294 | (decode-composition-components (nth 2 result) 'nocopy)) | |
295 | result)) | |
296 | ||
297 | \f | |
7141ee65 | 298 | (defun compose-chars-after (pos &optional limit object) |
c674f351 KH |
299 | "Compose characters in current buffer after position POS. |
300 | ||
301 | It looks up the char-table `composition-function-table' (which see) by | |
302 | a character after POS. If non-nil value is found, the format of the | |
303 | value should be an alist of PATTERNs vs FUNCs, where PATTERNs are | |
304 | regular expressions and FUNCs are functions. If the text after POS | |
305 | matches one of PATTERNs, call the corresponding FUNC with three | |
306 | arguments POS, TO, and PATTERN, where TO is the end position of text | |
307 | matching PATTERN, and return what FUNC returns. Otherwise, return | |
308 | nil. | |
309 | ||
310 | FUNC is responsible for composing the text properly. The return value | |
311 | is: | |
312 | nil -- if no characters were composed. | |
313 | CHARS (integer) -- if CHARS characters were composed. | |
314 | ||
315 | Optional 2nd arg LIMIT, if non-nil, limits the matching of text. | |
316 | ||
7141ee65 KH |
317 | Optional 3rd arg OBJECT, if non-nil, is a string that contains the |
318 | text to compose. In that case, POS and LIMIT index to the string. | |
319 | ||
c674f351 KH |
320 | This function is the default value of `compose-chars-after-function'." |
321 | (let ((tail (aref composition-function-table (char-after pos))) | |
322 | pattern func result) | |
323 | (when tail | |
339cebdc KH |
324 | (save-match-data |
325 | (save-excursion | |
71296446 | 326 | (while (and tail (not func)) |
339cebdc KH |
327 | (setq pattern (car (car tail)) |
328 | func (cdr (car tail))) | |
329 | (goto-char pos) | |
330 | (if (if limit | |
331 | (and (re-search-forward pattern limit t) | |
332 | (= (match-beginning 0) pos)) | |
333 | (looking-at pattern)) | |
334 | (setq result (funcall func pos (match-end 0) pattern nil)) | |
335 | (setq func nil tail (cdr tail))))))) | |
c674f351 KH |
336 | result)) |
337 | ||
c674f351 KH |
338 | (defun compose-last-chars (args) |
339 | "Compose last characters. | |
3b923ad8 KH |
340 | The argument is a parameterized event of the form |
341 | \(compose-last-chars N COMPONENTS), | |
342 | where N is the number of characters before point to compose, | |
343 | COMPONENTS, if non-nil, is the same as the argument to `compose-region' | |
344 | \(which see). If it is nil, `compose-chars-after' is called, | |
345 | and that function find a proper rule to compose the target characters. | |
c674f351 KH |
346 | This function is intended to be used from input methods. |
347 | The global keymap binds special event `compose-last-chars' to this | |
3b923ad8 | 348 | function. Input method may generate an event (compose-last-chars N COMPONENTS) |
c674f351 KH |
349 | after a sequence character events." |
350 | (interactive "e") | |
351 | (let ((chars (nth 1 args))) | |
352 | (if (and (numberp chars) | |
353 | (>= (- (point) (point-min)) chars)) | |
3b923ad8 KH |
354 | (if (nth 2 args) |
355 | (compose-region (- (point) chars) (point) (nth 2 args)) | |
356 | (compose-chars-after (- (point) chars) (point)))))) | |
c674f351 | 357 | |
68fbe650 | 358 | (global-set-key [compose-last-chars] 'compose-last-chars) |
c674f351 KH |
359 | |
360 | \f | |
68fbe650 KH |
361 | ;;; Automatic character composition. |
362 | ||
363 | (defvar composition-function-table | |
364 | (make-char-table nil) | |
365 | "Char table of functions for automatic character composition. | |
366 | For each character that has to be composed automatically with | |
367 | preceding and/or following characters, this char table contains | |
368 | a function to call to compose that character. | |
369 | ||
370 | Each function is called with two arguments, POS and STRING. | |
371 | ||
372 | If STRING is nil, POS is a position in the current buffer, and the | |
373 | function has to compose a character at POS with surrounding characters | |
374 | in the current buffer. | |
375 | ||
376 | Otherwise, STRING is a string, and POS is an index to the string. In | |
377 | this case, the function has to compose a character at POS with | |
378 | surrounding characters in the string. | |
379 | ||
380 | See also the command `toggle-auto-composition'.") | |
381 | ||
382 | ;; Copied from font-lock.el. | |
383 | (eval-when-compile | |
384 | ;; | |
385 | ;; We don't do this at the top-level as we only use non-autoloaded macros. | |
386 | (require 'cl) | |
387 | ;; | |
388 | ;; Borrowed from lazy-lock.el. | |
389 | ;; We use this to preserve or protect things when modifying text properties. | |
390 | (defmacro save-buffer-state (varlist &rest body) | |
391 | "Bind variables according to VARLIST and eval BODY restoring buffer state." | |
ba8972b6 KH |
392 | `(let* ,(append varlist |
393 | '((modified (buffer-modified-p)) (buffer-undo-list t) | |
394 | (inhibit-read-only t) (inhibit-point-motion-hooks t) | |
395 | (inhibit-modification-hooks t) | |
396 | deactivate-mark buffer-file-name buffer-file-truename)) | |
397 | ,@body | |
398 | (unless modified | |
399 | (restore-buffer-modified-p nil)))) | |
68fbe650 | 400 | (put 'save-buffer-state 'lisp-indent-function 1) |
ba8972b6 KH |
401 | ;; Fixme: This makes bootstrapping fails by this error. |
402 | ;; Symbol's function definition is void: eval-defun | |
403 | ;;(def-edebug-spec save-buffer-state let) | |
404 | ) | |
68fbe650 KH |
405 | |
406 | (defvar auto-composition-chunk-size 500 | |
407 | "*Automatic composition chunks of this many characters, or smaller.") | |
408 | ||
409 | (defun auto-compose-chars (pos string) | |
410 | "Compose characters after the buffer position POS. | |
411 | If STRING is non-nil, it is a string, and POS is an index to the string. | |
412 | In that case, compose characters in the string. | |
413 | ||
414 | This function is the default value of `auto-composition-function' (which see)." | |
415 | (save-buffer-state nil | |
416 | (save-excursion | |
417 | (save-restriction | |
418 | (save-match-data | |
419 | (let* ((start pos) | |
420 | (end (if string (length string) (point-max))) | |
421 | (limit (next-single-property-change pos 'auto-composed string | |
422 | end)) | |
423 | (lines 0) | |
424 | ch func newpos) | |
425 | (if (> (- limit start) auto-composition-chunk-size) | |
426 | (setq limit (+ start auto-composition-chunk-size))) | |
427 | (while (and (< pos end) | |
428 | (setq ch (if string (aref string pos) | |
429 | (char-after pos))) | |
430 | (or (< pos limit) | |
431 | (/= ch ?\n))) | |
432 | (setq func (aref composition-function-table ch)) | |
433 | (if (fboundp func) | |
434 | (setq newpos (funcall func pos string) | |
435 | pos (if (and (integerp newpos) (> newpos pos)) | |
436 | newpos | |
437 | (1+ pos))) | |
438 | (setq pos (1+ pos)))) | |
439 | (if (< pos limit) | |
440 | (setq pos (1+ pos))) | |
441 | (put-text-property start pos 'auto-composed t string))))))) | |
442 | ||
443 | (setq auto-composition-function 'auto-compose-chars) | |
444 | ||
445 | (defun toggle-auto-composition (&optional arg) | |
446 | "Change whether automatic character composition is enabled in this buffer. | |
447 | With arg, enable it iff arg is positive." | |
448 | (interactive "P") | |
449 | (let ((enable (if (null arg) (not auto-composition-function) | |
450 | (> (prefix-numeric-value arg) 0)))) | |
451 | (if enable | |
452 | (kill-local-variable 'auto-composition-function) | |
453 | (make-local-variable 'auto-composition-function) | |
454 | (setq auto-composition-function nil) | |
455 | (save-buffer-state nil | |
456 | (save-restriction | |
457 | (widen) | |
458 | (decompose-region (point-min) (point-max))))) | |
459 | ||
460 | (save-buffer-state nil | |
461 | (save-restriction | |
462 | (widen) | |
463 | (put-text-property (point-min) (point-max) 'auto-composed nil))))) | |
c674f351 KH |
464 | \f |
465 | ;;; The following codes are only for backward compatibility with Emacs | |
9d5d96a6 | 466 | ;;; 20.4 and earlier. |
c674f351 | 467 | |
c674f351 KH |
468 | (defun decompose-composite-char (char &optional type with-composition-rule) |
469 | "Convert CHAR to string. | |
c674f351 KH |
470 | |
471 | If optional 2nd arg TYPE is non-nil, it is `string', `list', or | |
1ea62389 JB |
472 | `vector'. In this case, CHAR is converted to string, list of CHAR, or |
473 | vector of CHAR respectively. | |
474 | Optional 3rd arg WITH-COMPOSITION-RULE is ignored." | |
c674f351 KH |
475 | (cond ((or (null type) (eq type 'string)) (char-to-string char)) |
476 | ((eq type 'list) (list char)) | |
477 | (t (vector char)))) | |
478 | ||
8d787845 KH |
479 | (make-obsolete 'decompose-composite-char 'char-to-string "21.1") |
480 | ||
c674f351 KH |
481 | \f |
482 | ;;; composite.el ends here |