Update copyright notices for 2013.
[bpt/emacs.git] / lisp / language / thai-util.el
CommitLineData
ef90a979 1;;; thai-util.el --- utilities for Thai -*- coding: utf-8; -*-
4ed46869 2
ab422c4d 3;; Copyright (C) 2000-2013 Free Software Foundation, Inc.
7976eda0 4;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 5;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
8f924df7 6;; National Institute of Advanced Industrial Science and Technology (AIST)
8f9eda28 7;; Registration Number H14PRO021
4ed46869 8
ffdc0bca 9;; Keywords: mule, multilingual, Thai, i18n
4ed46869
KH
10
11;; This file is part of GNU Emacs.
12
4936186e 13;; GNU Emacs is free software: you can redistribute it and/or modify
4ed46869 14;; it under the terms of the GNU General Public License as published by
4936186e
GM
15;; the Free Software Foundation, either version 3 of the License, or
16;; (at your option) any later version.
4ed46869
KH
17
18;; GNU Emacs is distributed in the hope that it will be useful,
19;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21;; GNU General Public License for more details.
22
23;; You should have received a copy of the GNU General Public License
4936186e 24;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
4ed46869 25
60370d40
PJ
26;;; Commentary:
27
4ed46869
KH
28;;; Code:
29
6978c919
JB
30(defvar thai-auto-composition-mode)
31
4ed46869
KH
32;; Setting information of Thai characters.
33
c595d888
KH
34(defconst thai-category-table (make-category-table))
35(define-category ?c "Thai consonant" thai-category-table)
36(define-category ?v "Thai upper/lower vowel" thai-category-table)
8f9eda28
KH
37(define-category ?t "Thai tone mark" thai-category-table)
38(define-category ?u "Thai tone mark and upper sign" thai-category-table)
39(define-category ?I "THAI CHARACTER SARA I" thai-category-table)
40(define-category ?U "THAI CHARACTER THANTHAKHAT" thai-category-table)
c595d888
KH
41
42;; The general composing rules are as follows:
43;;
44;; T
8f9eda28
KH
45;; V U V U
46;; CV -> C, CU -> C, CVT -> C, Cv -> C, CvU -> C
c595d888
KH
47;; v v
48;;
8f9eda28
KH
49;; where C: consonant, V: vowel upper, v: vowel lower,
50;; T: tone mark, U: tone mark and upper sign.
ef90a979 51;; Special rule: The sign `์' can be put on the vowel `ิ'.
c595d888 52
8f9eda28
KH
53
54(defvar thai-composition-pattern
55 "\\cc\\(\\cu\\|\\cI\\cU\\|\\cv\\ct?\\)\\|\\cv\\ct\\|\\cI\\cU"
c595d888 56 "Regular expression matching a Thai composite sequence.")
3de54645 57
ef90a979
KH
58(let ((l '((?ก consonant "LETTER KO KAI") ; 0xA1
59 (?ข consonant "LETTER KHO KHAI") ; 0xA2
60 (?ฃ consonant "LETTER KHO KHUAT") ; 0xA3
61 (?ค consonant "LETTER KHO KHWAI") ; 0xA4
62 (?ฅ consonant "LETTER KHO KHON") ; 0xA5
63 (?ฆ consonant "LETTER KHO RAKHANG") ; 0xA6
64 (?ง consonant "LETTER NGO NGU") ; 0xA7
65 (?จ consonant "LETTER CHO CHAN") ; 0xA8
66 (?ฉ consonant "LETTER CHO CHING") ; 0xA9
67 (?ช consonant "LETTER CHO CHANG") ; 0xAA
68 (?ซ consonant "LETTER SO SO") ; 0xAB
69 (?ฌ consonant "LETTER CHO CHOE") ; 0xAC
70 (?ญ consonant "LETTER YO YING") ; 0xAD
71 (?ฎ consonant "LETTER DO CHADA") ; 0xAE
72 (?ฏ consonant "LETTER TO PATAK") ; 0xAF
73 (?ฐ consonant "LETTER THO THAN") ; 0xB0
74 (?ฑ consonant "LETTER THO NANGMONTHO") ; 0xB1
75 (?ฒ consonant "LETTER THO PHUTHAO") ; 0xB2
76 (?ณ consonant "LETTER NO NEN") ; 0xB3
77 (?ด consonant "LETTER DO DEK") ; 0xB4
78 (?ต consonant "LETTER TO TAO") ; 0xB5
79 (?ถ consonant "LETTER THO THUNG") ; 0xB6
80 (?ท consonant "LETTER THO THAHAN") ; 0xB7
81 (?ธ consonant "LETTER THO THONG") ; 0xB8
82 (?น consonant "LETTER NO NU") ; 0xB9
83 (?บ consonant "LETTER BO BAIMAI") ; 0xBA
84 (?ป consonant "LETTER PO PLA") ; 0xBB
85 (?ผ consonant "LETTER PHO PHUNG") ; 0xBC
86 (?ฝ consonant "LETTER FO FA") ; 0xBD
87 (?พ consonant "LETTER PHO PHAN") ; 0xBE
88 (?ฟ consonant "LETTER FO FAN") ; 0xBF
89 (?ภ consonant "LETTER PHO SAMPHAO") ; 0xC0
90 (?ม consonant "LETTER MO MA") ; 0xC1
91 (?ย consonant "LETTER YO YAK") ; 0xC2
92 (?ร consonant "LETTER RO RUA") ; 0xC3
93 (?ฤ vowel-base "LETTER RU (Pali vowel letter)") ; 0xC4
94 (?ล consonant "LETTER LO LING") ; 0xC5
95 (?ฦ vowel-base "LETTER LU (Pali vowel letter)") ; 0xC6
96 (?ว consonant "LETTER WO WAEN") ; 0xC7
97 (?ศ consonant "LETTER SO SALA") ; 0xC8
98 (?ษ consonant "LETTER SO RUSI") ; 0xC9
99 (?ส consonant "LETTER SO SUA") ; 0xCA
100 (?ห consonant "LETTER HO HIP") ; 0xCB
101 (?ฬ consonant "LETTER LO CHULA") ; 0xCC
102 (?อ consonant "LETTER O ANG") ; 0xCD
103 (?ฮ consonant "LETTER HO NOK HUK") ; 0xCE
104 (?ฯ special "PAI YAN NOI (abbreviation)") ; 0xCF
105 (?ะ vowel-base "VOWEL SIGN SARA A") ; 0xD0
106 (?ั vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T") ; 0xD1
107 (?า vowel-base "VOWEL SIGN SARA AA") ; 0xD2
108 (?ำ vowel-base "VOWEL SIGN SARA AM") ; 0xD3
109 (?ิ vowel-upper "VOWEL SIGN SARA I N/S-T") ; 0xD4
110 (?ี vowel-upper "VOWEL SIGN SARA II N/S-T") ; 0xD5
111 (?ึ vowel-upper "VOWEL SIGN SARA UE N/S-T") ; 0xD6
112 (?ื vowel-upper "VOWEL SIGN SARA UEE N/S-T") ; 0xD7
113 (?ุ vowel-lower "VOWEL SIGN SARA U N/S-B") ; 0xD8
114 (?ู vowel-lower "VOWEL SIGN SARA UU N/S-B") ; 0xD9
115 (?ฺ vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)") ; 0xDA
116 (?฻ invalid nil) ; 0xDA
117 (?฼ invalid nil) ; 0xDC
118 (?฽ invalid nil) ; 0xDC
119 (?฾ invalid nil) ; 0xDC
120 (?฿ special "BAHT SIGN (currency symbol)") ; 0xDF
121 (?เ vowel-base "VOWEL SIGN SARA E") ; 0xE0
122 (?แ vowel-base "VOWEL SIGN SARA AE") ; 0xE1
123 (?โ vowel-base "VOWEL SIGN SARA O") ; 0xE2
124 (?ใ vowel-base "VOWEL SIGN SARA MAI MUAN") ; 0xE3
125 (?ไ vowel-base "VOWEL SIGN SARA MAI MALAI") ; 0xE4
126 (?ๅ vowel-base "LAK KHANG YAO") ; 0xE5
40ba43b4 127 (?ๆ special "MAI YAMOK (repetition)") ; 0xE6
ef90a979
KH
128 (?็ sign-upper "VOWEL SIGN MAI TAI KHU N/S-T") ; 0xE7
129 (?่ tone "TONE MAI EK N/S-T") ; 0xE8
130 (?้ tone "TONE MAI THO N/S-T") ; 0xE9
131 (?๊ tone "TONE MAI TRI N/S-T") ; 0xEA
132 (?๋ tone "TONE MAI CHATTAWA N/S-T") ; 0xEB
133 (?์ sign-upper "THANTHAKHAT N/S-T (cancellation mark)") ; 0xEC
134 (?ํ sign-upper "NIKKHAHIT N/S-T (final nasal)") ; 0xED
135 (?๎ sign-upper "YAMAKKAN N/S-T") ; 0xEE
136 (?๏ special "FONRMAN") ; 0xEF
137 (?๐ special "DIGIT ZERO") ; 0xF0
138 (?๑ special "DIGIT ONE") ; 0xF1
139 (?๒ special "DIGIT TWO") ; 0xF2
140 (?๓ special "DIGIT THREE") ; 0xF3
141 (?๔ special "DIGIT FOUR") ; 0xF4
142 (?๕ special "DIGIT FIVE") ; 0xF5
143 (?๖ special "DIGIT SIX") ; 0xF6
144 (?๗ special "DIGIT SEVEN") ; 0xF7
145 (?๘ special "DIGIT EIGHT") ; 0xF8
146 (?๙ special "DIGIT NINE") ; 0xF9
147 (?๚ special "ANGKHANKHU (ellipsis)") ; 0xFA
148 (?๛ special "KHOMUT (beginning of religious texts)") ; 0xFB
149 (?๜ invalid nil) ; 0xFC
150 (?๝ invalid nil) ; 0xFD
151 (?๞ invalid nil) ; 0xFE
4ed46869
KH
152 ))
153 elm)
154 (while l
c595d888
KH
155 (setq elm (car l) l (cdr l))
156 (let ((char (car elm))
157 (ptype (nth 1 elm)))
158 (put-char-code-property char 'phonetic-type ptype)
159 (cond ((eq ptype 'consonant)
36cacb1f 160 (modify-category-entry char ?c thai-category-table))
c595d888 161 ((memq ptype '(vowel-upper vowel-lower))
8f9eda28 162 (modify-category-entry char ?v thai-category-table)
ef90a979 163 (if (= char ?ิ)
8f9eda28 164 ;; Give category `I' to "SARA I".
e8d0ae6b 165 (modify-category-entry char ?I thai-category-table)))
c595d888 166 ((eq ptype 'tone)
8f9eda28 167 (modify-category-entry char ?t thai-category-table)
e8d0ae6b 168 (modify-category-entry char ?u thai-category-table))
8f9eda28
KH
169 ((eq ptype 'sign-upper)
170 (modify-category-entry char ?u thai-category-table)
ef90a979 171 (if (= char ?์)
8f9eda28 172 ;; Give category `U' to "THANTHAKHAT".
e8d0ae6b 173 (modify-category-entry char ?U thai-category-table))))
c595d888 174 (put-char-code-property char 'name (nth 2 elm)))))
3de54645 175
8f9eda28
KH
176(defun thai-compose-syllable (beg end &optional category-set string)
177 (or category-set
c92164e1 178 (setq category-set
8f9eda28
KH
179 (char-category-set (if string (aref string beg) (char-after beg)))))
180 (if (aref category-set ?c)
181 ;; Starting with a consonant. We do relative composition.
182 (if string
183 (compose-string string beg end)
184 (compose-region beg end))
185 ;; Vowel tone sequence.
186 (if string
c92164e1 187 (compose-string string beg end (list (aref string beg) '(Bc . Bc)
8f9eda28 188 (aref string (1+ beg))))
c92164e1 189 (compose-region beg end (list (char-after beg) '(Bc . Bc)
8f9eda28
KH
190 (char-after (1+ beg))))))
191 (- end beg))
192
4ed46869
KH
193;;;###autoload
194(defun thai-compose-region (beg end)
195 "Compose Thai characters in the region.
196When called from a program, expects two arguments,
197positions (integers or markers) specifying the region."
198 (interactive "r")
8f9eda28
KH
199 (let ((pos (point)))
200 (save-restriction
201 (narrow-to-region beg end)
202 (goto-char (point-min))
203 (with-category-table thai-category-table
204 (while (re-search-forward thai-composition-pattern nil t)
205 (setq beg (match-beginning 0) end (match-end 0))
206 (if (and (> pos beg) (< pos end))
207 (setq pos end))
208 (thai-compose-syllable beg end
209 (char-category-set (char-after beg))))))
210 (goto-char pos)))
4ed46869 211
0a57cf51
KH
212;;;###autoload
213(defun thai-compose-string (string)
214 "Compose Thai characters in STRING and return the resulting string."
c595d888
KH
215 (with-category-table thai-category-table
216 (let ((idx 0))
217 (while (setq idx (string-match thai-composition-pattern string idx))
8f9eda28 218 (thai-compose-syllable idx (match-end 0) nil string)
c595d888
KH
219 (setq idx (match-end 0)))))
220 string)
a1506d29 221
4ed46869
KH
222;;;###autoload
223(defun thai-compose-buffer ()
224 "Compose Thai characters in the current buffer."
225 (interactive)
226 (thai-compose-region (point-min) (point-max)))
227
228;;;###autoload
ef90a979
KH
229(defun thai-composition-function (gstring)
230 (if (= (lgstring-char-len gstring) 1)
231 (compose-gstring-for-graphic gstring)
232 (or (font-shape-gstring gstring)
233 (let ((glyph-len (lgstring-glyph-len gstring))
234 (last-char (lgstring-char gstring
235 (1- (lgstring-char-len gstring))))
236 (i 0)
237 glyph)
238 (while (and (< i glyph-len)
239 (setq glyph (lgstring-glyph gstring i)))
240 (setq i (1+ i)))
241 (if (= last-char ?ำ)
242 (setq i (1- i)))
243 (compose-glyph-string-relative gstring 0 i 0.1)))))
4ed46869 244
1085b551
KH
245;; Thai-word-mode requires functions in the feature `thai-word'.
246(require 'thai-word)
247
248(defvar thai-word-mode-map
249 (let ((map (make-sparse-keymap)))
250 (define-key map [remap forward-word] 'thai-forward-word)
251 (define-key map [remap backward-word] 'thai-backward-word)
252 (define-key map [remap kill-word] 'thai-kill-word)
253 (define-key map [remap backward-kill-word] 'thai-backward-kill-word)
254 (define-key map [remap transpose-words] 'thai-transpose-words)
255 map)
256 "Keymap for `thai-word-mode'.")
257
258(define-minor-mode thai-word-mode
259 "Minor mode to make word-oriented commands aware of Thai words.
e1ac4066
GM
260With a prefix argument ARG, enable the mode if ARG is positive,
261and disable it otherwise. If called from Lisp, enable the mode
262if ARG is omitted or nil. The commands affected are
263\\[forward-word], \\[backward-word], \\[kill-word], \\[backward-kill-word],
264\\[transpose-words], and \\[fill-paragraph]."
c92164e1 265 :global t :group 'mule
1085b551
KH
266 (cond (thai-word-mode
267 ;; This enables linebreak between Thai characters.
268 (modify-category-entry (make-char 'thai-tis620) ?|)
269 ;; This enables linebreak at a Thai word boundary.
270 (put-charset-property 'thai-tis620 'fill-find-break-point-function
271 'thai-fill-find-break-point))
272 (t
273 (modify-category-entry (make-char 'thai-tis620) ?| nil t)
274 (put-charset-property 'thai-tis620 'fill-find-break-point-function
275 nil))))
276
277;; Function to call on entering the Thai language environment.
278(defun setup-thai-language-environment-internal ()
279 (thai-word-mode 1))
280
281;; Function to call on exiting the Thai language environment.
282(defun exit-thai-language-environment-internal ()
283 (thai-word-mode -1))
284
4ed46869 285;;
650e8505 286(provide 'thai-util)
4ed46869 287
4ed46869 288;;; thai-util.el ends here