| 1 | ;;; composite.el --- support character composition |
| 2 | |
| 3 | ;; Copyright (C) 1999 Electrotechnical Laboratory, JAPAN. |
| 4 | ;; Licensed to the Free Software Foundation. |
| 5 | |
| 6 | ;; Keywords: mule, multilingual, character composition |
| 7 | |
| 8 | ;; This file is part of GNU Emacs. |
| 9 | |
| 10 | ;; GNU Emacs is free software; you can redistribute it and/or modify |
| 11 | ;; it under the terms of the GNU General Public License as published by |
| 12 | ;; the Free Software Foundation; either version 2, or (at your option) |
| 13 | ;; any later version. |
| 14 | |
| 15 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 16 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ;; GNU General Public License for more details. |
| 19 | |
| 20 | ;; You should have received a copy of the GNU General Public License |
| 21 | ;; along with GNU Emacs; see the file COPYING. If not, write to the |
| 22 | ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
| 23 | ;; Boston, MA 02111-1307, USA. |
| 24 | |
| 25 | ;;; Commentary: |
| 26 | |
| 27 | ;;; Code: |
| 28 | |
| 29 | ;;;###autoload |
| 30 | (defconst reference-point-alist |
| 31 | '((tl . 0) (tc . 1) (tr . 2) |
| 32 | (Bl . 3) (Bc . 4) (Br . 5) |
| 33 | (bl . 6) (bc . 7) (br . 8) |
| 34 | (cl . 9) (cc . 10) (cr . 11) |
| 35 | (top-left . 0) (top-center . 1) (top-right . 2) |
| 36 | (base-left . 3) (base-center . 4) (base-right . 5) |
| 37 | (bottom-left . 6) (bottom-center . 7) (bottom-right . 8) |
| 38 | (center-left . 9) (center-center . 10) (center-right . 11) |
| 39 | ;; For backward compatibility... |
| 40 | (ml . 3) (mc . 10) (mr . 5) |
| 41 | (mid-left . 3) (mid-center . 10) (mid-right . 5)) |
| 42 | "Alist of symbols vs integer codes of glyph reference points. |
| 43 | A glyph reference point symbol is to be used to specify a composition |
| 44 | rule in COMPONENTS argument to such functions as `compose-region' and |
| 45 | `make-composition'. |
| 46 | |
| 47 | Meanings of glyph reference point codes are as follows: |
| 48 | |
| 49 | 0----1----2 <---- ascent 0:tl or top-left |
| 50 | | | 1:tc or top-center |
| 51 | | | 2:tr or top-right |
| 52 | | | 3:Bl or base-left 9:cl or center-left |
| 53 | 9 10 11 <---- center 4:Bc or base-center 10:cc or center-center |
| 54 | | | 5:Br or base-right 11:cr or center-right |
| 55 | --3----4----5-- <-- baseline 6:bl or bottom-left |
| 56 | | | 7:bc or bottom-center |
| 57 | 6----7----8 <---- descent 8:br or bottom-right |
| 58 | |
| 59 | Glyph reference point symbols are to be used to specify composition |
| 60 | rule of the form \(GLOBAL-REF-POINT . NEW-REF-POINT), where |
| 61 | GLOBAL-REF-POINT is a reference point in the overall glyphs already |
| 62 | composed, and NEW-REF-POINT is a reference point in the new glyph to |
| 63 | be added. |
| 64 | |
| 65 | For instance, if GLOBAL-REF-POINT is `br' (bottom-right) and |
| 66 | NEW-REF-POINT is `tc' (top-center), the overall glyph is updated as |
| 67 | follows (the point `*' corresponds to both reference points): |
| 68 | |
| 69 | +-------+--+ <--- new ascent |
| 70 | | | | |
| 71 | | global| | |
| 72 | | glyph | | |
| 73 | -- | | |-- <--- baseline \(doesn't change) |
| 74 | +----+--*--+ |
| 75 | | | new | |
| 76 | | |glyph| |
| 77 | +----+-----+ <--- new descent |
| 78 | ") |
| 79 | |
| 80 | ;; Encode composition rule RULE into an integer value. RULE is a cons |
| 81 | ;; of global and new reference point symbols. |
| 82 | ;; This must be compatible with C macro COMPOSITION_ENCODE_RULE |
| 83 | ;; defined in composite.h. |
| 84 | |
| 85 | (defun encode-composition-rule (rule) |
| 86 | (if (and (integerp rule) (< rule 144)) |
| 87 | ;; Already encoded. |
| 88 | rule |
| 89 | (or (consp rule) |
| 90 | (error "Invalid composition rule: %S" rule)) |
| 91 | (let ((gref (car rule)) |
| 92 | (nref (cdr rule))) |
| 93 | (or (integerp gref) |
| 94 | (setq gref (cdr (assq gref reference-point-alist)))) |
| 95 | (or (integerp nref) |
| 96 | (setq nref (cdr (assq nref reference-point-alist)))) |
| 97 | (or (and (>= gref 0) (< gref 12) (>= nref 0) (< nref 12)) |
| 98 | (error "Invalid composition rule: %S" rule)) |
| 99 | (+ (* gref 12) nref)))) |
| 100 | |
| 101 | ;; Decode encoded composition rule RULE-CODE. The value is a cons of |
| 102 | ;; global and new reference point symbols. |
| 103 | ;; This must be compatible with C macro COMPOSITION_DECODE_RULE |
| 104 | ;; defined in composite.h. |
| 105 | |
| 106 | (defun decode-composition-rule (rule-code) |
| 107 | (or (and (natnump rule-code) (< rule-code 144)) |
| 108 | (error "Invalid encoded composition rule: %S" rule-code)) |
| 109 | (let ((gref (car (rassq (/ rule-code 12) reference-point-alist))) |
| 110 | (nref (car (rassq (% rule-code 12) reference-point-alist)))) |
| 111 | (or (and gref (symbolp gref) nref (symbolp nref)) |
| 112 | (error "Invalid composition rule code: %S" rule-code)) |
| 113 | (cons gref nref))) |
| 114 | |
| 115 | ;; Encode composition rules in composition components COMPONENTS. The |
| 116 | ;; value is a copy of COMPONENTS, where composition rules (cons of |
| 117 | ;; global and new glyph reference point symbols) are replaced with |
| 118 | ;; encoded composition rules. Optional 2nd argument NOCOPY non-nil |
| 119 | ;; means don't make a copy but modify COMPONENTS directly. |
| 120 | |
| 121 | (defun encode-composition-components (components &optional nocopy) |
| 122 | (or nocopy |
| 123 | (setq components (copy-sequence components))) |
| 124 | (if (vectorp components) |
| 125 | (let ((len (length components)) |
| 126 | (i 1)) |
| 127 | (while (< i len) |
| 128 | (aset components i |
| 129 | (encode-composition-rule (aref components i))) |
| 130 | (setq i (+ i 2)))) |
| 131 | (let ((tail (cdr components))) |
| 132 | (while tail |
| 133 | (setcar tail |
| 134 | (encode-composition-rule (car tail))) |
| 135 | (setq tail (nthcdr 2 tail))))) |
| 136 | components) |
| 137 | |
| 138 | ;; Decode composition rule codes in composition components COMPONENTS. |
| 139 | ;; The value is a copy of COMPONENTS, where composition rule codes are |
| 140 | ;; replaced with composition rules (cons of global and new glyph |
| 141 | ;; reference point symbols). Optional 2nd argument NOCOPY non-nil |
| 142 | ;; means don't make a copy but modify COMPONENTS directly. |
| 143 | ;; It is assumed that COMPONENTS is a vector and is for rule-base |
| 144 | ;; composition, thus (2N+1)th elements are rule codes. |
| 145 | |
| 146 | (defun decode-composition-components (components &optional nocopy) |
| 147 | (or nocopy |
| 148 | (setq components (copy-sequence components))) |
| 149 | (let ((len (length components)) |
| 150 | (i 1)) |
| 151 | (while (< i len) |
| 152 | (aset components i |
| 153 | (decode-composition-rule (aref components i))) |
| 154 | (setq i (+ i 2)))) |
| 155 | components) |
| 156 | |
| 157 | ;;;###autoload |
| 158 | (defun compose-region (start end &optional components modification-func) |
| 159 | "Compose characters in the current region. |
| 160 | |
| 161 | When called from a program, expects these four arguments. |
| 162 | |
| 163 | First two arguments START and END are positions (integers or markers) |
| 164 | specifying the region. |
| 165 | |
| 166 | Optional 3rd argument COMPONENTS, if non-nil, is a character or a |
| 167 | sequence (vector, list, or string) of integers. |
| 168 | |
| 169 | If it is a character, it is an alternate character to display instead |
| 170 | of the text in the region. |
| 171 | |
| 172 | If it is a string, the elements are alternate characters. |
| 173 | |
| 174 | If it is a vector or list, it is a sequence of alternate characters and |
| 175 | composition rules, where (2N)th elements are characters and (2N+1)th |
| 176 | elements are composition rules to specify how to compose (2N+2)th |
| 177 | elements with previously composed N glyphs. |
| 178 | |
| 179 | A composition rule is a cons of global and new glyph reference point |
| 180 | symbols. See the documentation of `reference-point-alist' for more |
| 181 | detail. |
| 182 | |
| 183 | Optional 4th argument MODIFICATION-FUNC is a function to call to |
| 184 | adjust the composition when it gets invalid because of a change of |
| 185 | text in the composition." |
| 186 | (interactive "r") |
| 187 | (let ((modified-p (buffer-modified-p)) |
| 188 | (buffer-read-only nil)) |
| 189 | (if (or (vectorp components) (listp components)) |
| 190 | (setq components (encode-composition-components components))) |
| 191 | (compose-region-internal start end components modification-func) |
| 192 | (set-buffer-modified-p modified-p))) |
| 193 | |
| 194 | ;;;###autoload |
| 195 | (defun decompose-region (start end) |
| 196 | "Decompose text in the current region. |
| 197 | |
| 198 | When called from a program, expects two arguments, |
| 199 | positions (integers or markers) specifying the region." |
| 200 | (interactive "r") |
| 201 | (let ((modified-p (buffer-modified-p)) |
| 202 | (buffer-read-only nil)) |
| 203 | (remove-text-properties start end '(composition nil)) |
| 204 | (set-buffer-modified-p modified-p))) |
| 205 | |
| 206 | ;;;###autoload |
| 207 | (defun compose-string (string &optional start end components modification-func) |
| 208 | "Compose characters in string STRING. |
| 209 | |
| 210 | The return value is STRING where `composition' property is put on all |
| 211 | the characters in it. |
| 212 | |
| 213 | Optional 2nd and 3rd arguments START and END specify the range of |
| 214 | STRING to be composed. They defaults to the beginning and the end of |
| 215 | STRING respectively. |
| 216 | |
| 217 | Optional 4th argument COMPONENTS, if non-nil, is a character or a |
| 218 | sequence (vector, list, or string) of integers. See the function |
| 219 | `compose-region' for more detail. |
| 220 | |
| 221 | Optional 5th argument MODIFICATION-FUNC is a function to call to |
| 222 | adjust the composition when it gets invalid because of a change of |
| 223 | text in the composition." |
| 224 | (if (or (vectorp components) (listp components)) |
| 225 | (setq components (encode-composition-components components))) |
| 226 | (or start (setq start 0)) |
| 227 | (or end (setq end (length string))) |
| 228 | (compose-string-internal string start end components modification-func) |
| 229 | string) |
| 230 | |
| 231 | ;;;###autoload |
| 232 | (defun decompose-string (string) |
| 233 | "Return STRING where `composition' property is removed." |
| 234 | (remove-text-properties 0 (length string) '(composition nil) string) |
| 235 | string) |
| 236 | |
| 237 | ;;;###autoload |
| 238 | (defun compose-chars (&rest args) |
| 239 | "Return a string from arguments in which all characters are composed. |
| 240 | For relative composition, arguments are characters. |
| 241 | For rule-based composition, Mth \(where M is odd) arguments are |
| 242 | characters, and Nth \(where N is even) arguments are composition rules. |
| 243 | A composition rule is a cons of glyph reference points of the form |
| 244 | \(GLOBAL-REF-POINT . NEW-REF-POINT). See the documentation of |
| 245 | `reference-point-alist' for more detail." |
| 246 | (let (str components) |
| 247 | (if (consp (car (cdr args))) |
| 248 | ;; Rule-base composition. |
| 249 | (let ((len (length args)) |
| 250 | (tail (encode-composition-components args 'nocopy))) |
| 251 | |
| 252 | (while tail |
| 253 | (setq str (cons (car tail) str)) |
| 254 | (setq tail (nthcdr 2 tail))) |
| 255 | (setq str (concat (nreverse str)) |
| 256 | components args)) |
| 257 | ;; Relative composition. |
| 258 | (setq str (concat args))) |
| 259 | (compose-string-internal str 0 (length str) components))) |
| 260 | |
| 261 | ;;;###autoload |
| 262 | (defun find-composition (pos &optional limit string detail-p) |
| 263 | "Return information about a composition at or nearest to buffer position POS. |
| 264 | |
| 265 | If the character at POS has `composition' property, the value is a list |
| 266 | of FROM, TO, and VALID-P. |
| 267 | |
| 268 | FROM and TO specify the range of text that has the same `composition' |
| 269 | property, VALID-P is non-nil if and only if this composition is valid. |
| 270 | |
| 271 | If there's no composition at POS, and the optional 2nd argument LIMIT |
| 272 | is non-nil, search for a composition toward LIMIT. |
| 273 | |
| 274 | If no composition is found, return nil. |
| 275 | |
| 276 | Optional 3rd argument STRING, if non-nil, is a string to look for a |
| 277 | composition in; nil means the current buffer. |
| 278 | |
| 279 | If a valid composition is found and the optional 4th argument DETAIL-P |
| 280 | is non-nil, the return value is a list of FROM, TO, COMPONENTS, |
| 281 | RELATIVE-P, MOD-FUNC, and WIDTH. |
| 282 | |
| 283 | COMPONENTS is a vector of integers, the meaning depends on RELATIVE-P. |
| 284 | |
| 285 | RELATIVE-P is t if the composition method is relative, else nil. |
| 286 | |
| 287 | If RELATIVE-P is t, COMPONENTS is a vector of characters to be |
| 288 | composed. If RELATIVE-P is nil, COMPONENTS is a vector of characters |
| 289 | and composition rules as described in `compose-region'. |
| 290 | |
| 291 | MOD-FUNC is a modification function of the composition. |
| 292 | |
| 293 | WIDTH is a number of columns the composition occupies on the screen." |
| 294 | (let ((result (find-composition-internal pos limit string detail-p))) |
| 295 | (if (and detail-p result (nth 2 result) (not (nth 3 result))) |
| 296 | ;; This is a valid rule-base composition. |
| 297 | (decode-composition-components (nth 2 result) 'nocopy)) |
| 298 | result)) |
| 299 | |
| 300 | \f |
| 301 | ;;;###autoload |
| 302 | (defun compose-chars-after (pos &optional limit object) |
| 303 | "Compose characters in current buffer after position POS. |
| 304 | |
| 305 | It looks up the char-table `composition-function-table' (which see) by |
| 306 | a character after POS. If non-nil value is found, the format of the |
| 307 | value should be an alist of PATTERNs vs FUNCs, where PATTERNs are |
| 308 | regular expressions and FUNCs are functions. If the text after POS |
| 309 | matches one of PATTERNs, call the corresponding FUNC with three |
| 310 | arguments POS, TO, and PATTERN, where TO is the end position of text |
| 311 | matching PATTERN, and return what FUNC returns. Otherwise, return |
| 312 | nil. |
| 313 | |
| 314 | FUNC is responsible for composing the text properly. The return value |
| 315 | is: |
| 316 | nil -- if no characters were composed. |
| 317 | CHARS (integer) -- if CHARS characters were composed. |
| 318 | |
| 319 | Optional 2nd arg LIMIT, if non-nil, limits the matching of text. |
| 320 | |
| 321 | Optional 3rd arg OBJECT, if non-nil, is a string that contains the |
| 322 | text to compose. In that case, POS and LIMIT index to the string. |
| 323 | |
| 324 | This function is the default value of `compose-chars-after-function'." |
| 325 | (let ((tail (aref composition-function-table (char-after pos))) |
| 326 | pattern func result) |
| 327 | (when tail |
| 328 | (save-match-data |
| 329 | (save-excursion |
| 330 | (while (and tail (not func)) |
| 331 | (setq pattern (car (car tail)) |
| 332 | func (cdr (car tail))) |
| 333 | (goto-char pos) |
| 334 | (if (if limit |
| 335 | (and (re-search-forward pattern limit t) |
| 336 | (= (match-beginning 0) pos)) |
| 337 | (looking-at pattern)) |
| 338 | (setq result (funcall func pos (match-end 0) pattern nil)) |
| 339 | (setq func nil tail (cdr tail))))))) |
| 340 | result)) |
| 341 | |
| 342 | ;;;###autoload |
| 343 | (defun compose-last-chars (args) |
| 344 | "Compose last characters. |
| 345 | The argument is a parameterized event of the form |
| 346 | \(compose-last-chars N COMPONENTS), |
| 347 | where N is the number of characters before point to compose, |
| 348 | COMPONENTS, if non-nil, is the same as the argument to `compose-region' |
| 349 | \(which see). If it is nil, `compose-chars-after' is called, |
| 350 | and that function find a proper rule to compose the target characters. |
| 351 | This function is intended to be used from input methods. |
| 352 | The global keymap binds special event `compose-last-chars' to this |
| 353 | function. Input method may generate an event (compose-last-chars N COMPONENTS) |
| 354 | after a sequence character events." |
| 355 | (interactive "e") |
| 356 | (let ((chars (nth 1 args))) |
| 357 | (if (and (numberp chars) |
| 358 | (>= (- (point) (point-min)) chars)) |
| 359 | (if (nth 2 args) |
| 360 | (compose-region (- (point) chars) (point) (nth 2 args)) |
| 361 | (compose-chars-after (- (point) chars) (point)))))) |
| 362 | |
| 363 | ;;;###autoload(global-set-key [compose-last-chars] 'compose-last-chars) |
| 364 | |
| 365 | \f |
| 366 | ;;; The following codes are only for backward compatibility with Emacs |
| 367 | ;;; 20.4 and the earlier. |
| 368 | |
| 369 | ;;;###autoload |
| 370 | (defun decompose-composite-char (char &optional type with-composition-rule) |
| 371 | "Convert CHAR to string. |
| 372 | This is only for backward compatibility with Emacs 20.4 and the earlier. |
| 373 | |
| 374 | If optional 2nd arg TYPE is non-nil, it is `string', `list', or |
| 375 | `vector'. In this case, CHAR is converted string, list of CHAR, or |
| 376 | vector of CHAR respectively." |
| 377 | (cond ((or (null type) (eq type 'string)) (char-to-string char)) |
| 378 | ((eq type 'list) (list char)) |
| 379 | (t (vector char)))) |
| 380 | |
| 381 | (make-obsolete 'decompose-composite-char 'char-to-string "21.1") |
| 382 | |
| 383 | \f |
| 384 | ;;; composite.el ends here |