Switch to recommended form of GPLv3 permissions notice.
[bpt/emacs.git] / lisp / language / tibet-util.el
CommitLineData
348f797a 1;;; tibet-util.el --- utilities for Tibetan -*- coding: iso-2022-7bit; -*-
80d75b56 2
38141d20 3;; Copyright (C) 1997, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
d4877ac1 4;; Free Software Foundation, Inc.
7976eda0 5;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
38141d20 6;; 2005, 2006, 2007, 2008
eaa61218
KH
7;; National Institute of Advanced Industrial Science and Technology (AIST)
8;; Registration Number H14PRO021
80d75b56
KH
9
10;; Keywords: multilingual, Tibetan
11
12;; This file is part of GNU Emacs.
13
4936186e 14;; GNU Emacs is free software: you can redistribute it and/or modify
80d75b56 15;; it under the terms of the GNU General Public License as published by
4936186e
GM
16;; the Free Software Foundation, either version 3 of the License, or
17;; (at your option) any later version.
80d75b56
KH
18
19;; GNU Emacs is distributed in the hope that it will be useful,
20;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;; GNU General Public License for more details.
23
24;; You should have received a copy of the GNU General Public License
4936186e 25;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
80d75b56
KH
26
27;; Author: Toru TOMABECHI, <Toru.Tomabechi@orient.unil.ch>
28
29;; Created: Feb. 17. 1997
30
e8af40ee 31;;; History:
80d75b56
KH
32;; 1997.03.13 Modification in treatment of text properties;
33;; Support for some special signs and punctuations.
3bdf8898 34;; 1999.10.25 Modification for a new composition way by K.Handa.
80d75b56 35
e8af40ee
PJ
36;;; Commentary:
37
80d75b56
KH
38;;; Code:
39
93f7e657 40(defconst tibetan-obsolete-glyphs
8f924df7
KH
41 `(("\e$(7!=\e(B" . "\e$(7!=\e(B") ; 2 col <-> 1 col
42 ("\e$(7!?\e(B" . "\e$(7!?\e(B")
43 ("\e$(7!@\e(B" . "\e$(7!@\e(B")
44 ("\e$(7!A\e(B" . "\e$(7!A\e(B")
45 ("\e$(7"`\e(B" . "\e$(7"`\e(B")
46 ("\e$(7!;\e(B" . "\e$(7!;\e(B")
47 ("\e$(7!D\e(B" . "\e$(7!D\e(B")
93f7e657
RS
48 ;; Yes these are dirty. But ...
49 ("\e$(7!>\e(B \e$(7!>\e(B" . ,(compose-string "\e$(7!>\e(B \e$(7!>\e(B" 0 3 [?\e$(7!>\e(B (Br . Bl) ? (Br . Bl) ?\e$(7!>\e(B]))
50 ("\e$(7!4!5!5\e(B" . ,(compose-string
51 "\e$(7#R#S#S#S\e(B" 0 4
52 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B]))
53 ("\e$(7!4!5\e(B" . ,(compose-string "\e$(7#R#S#S\e(B" 0 3 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (Br . Bl) ?\e$(7#S\e(B]))
54 ("\e$(7!6\e(B" . ,(compose-string "\e$(7#R#S!I\e(B" 0 3 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B (br . tr) ?\e$(7!I\e(B]))
55 ("\e$(7!4\e(B" . ,(compose-string "\e$(7#R#S\e(B" 0 2 [?\e$(7#R\e(B (Br . Bl) ?\e$(7#S\e(B]))))
56
80d75b56 57;;;###autoload
3bdf8898
KH
58(defun tibetan-char-p (ch)
59 "Check if char CH is Tibetan character.
60Returns non-nil if CH is Tibetan. Otherwise, returns nil."
61 (memq (char-charset ch) '(tibetan tibetan-1-column)))
80d75b56 62
3bdf8898 63;;; Functions for Tibetan <-> Tibetan-transcription.
80d75b56
KH
64
65;;;###autoload
3bdf8898
KH
66(defun tibetan-tibetan-to-transcription (str)
67 "Transcribe Tibetan string STR and return the corresponding Roman string."
68 (let (;; Accumulate transcriptions here in reverse order.
69 (trans nil)
70 (len (length str))
71 (i 0)
72 ch this-trans)
73 (while (< i len)
ff69c4ee 74 (let ((idx (string-match tibetan-precomposition-rule-regexp str i)))
3bdf8898
KH
75 (if (eq idx i)
76 ;; Ith character and the followings matches precomposable
77 ;; Tibetan sequence.
78 (setq i (match-end 0)
79 this-trans
80 (car (rassoc
81 (cdr (assoc (match-string 0 str)
82 tibetan-precomposition-rule-alist))
83 tibetan-precomposed-transcription-alist)))
84 (setq ch (substring str i (1+ i))
85 i (1+ i)
86 this-trans
87 (car (or (rassoc ch tibetan-consonant-transcription-alist)
88 (rassoc ch tibetan-vowel-transcription-alist)
89 (rassoc ch tibetan-subjoined-transcription-alist)))))
90 (setq trans (cons this-trans trans))))
91 (apply 'concat (nreverse trans))))
80d75b56 92
3bdf8898
KH
93;;;###autoload
94(defun tibetan-transcription-to-tibetan (str)
95 "Convert Tibetan Roman string STR to Tibetan character string.
96The returned string has no composition information."
97 (let (;; Case is significant.
98 (case-fold-search nil)
99 (idx 0)
100 ;; Accumulate Tibetan strings here in reverse order.
101 (t-str-list nil)
102 i subtrans)
103 (while (setq i (string-match tibetan-regexp str idx))
104 (if (< idx i)
105 ;; STR contains a pattern that doesn't match Tibetan
106 ;; transcription. Include the pattern as is.
107 (setq t-str-list (cons (substring str idx i) t-str-list)))
108 (setq subtrans (match-string 0 str)
109 idx (match-end 0))
110 (let ((t-char (cdr (assoc subtrans
111 tibetan-precomposed-transcription-alist))))
112 (if t-char
113 ;; SUBTRANS corresponds to a transcription for
114 ;; precomposable Tibetan sequence.
115 (setq t-char (car (rassoc t-char
116 tibetan-precomposition-rule-alist)))
117 (setq t-char
118 (cdr
119 (or (assoc subtrans tibetan-consonant-transcription-alist)
120 (assoc subtrans tibetan-vowel-transcription-alist)
121 (assoc subtrans tibetan-modifier-transcription-alist)
122 (assoc subtrans tibetan-subjoined-transcription-alist)))))
123 (setq t-str-list (cons t-char t-str-list))))
124 (if (< idx (length str))
125 (setq t-str-list (cons (substring str idx) t-str-list)))
126 (apply 'concat (nreverse t-str-list))))
80d75b56
KH
127
128;;;
3bdf8898 129;;; Functions for composing/decomposing Tibetan sequence.
80d75b56
KH
130;;;
131;;; A Tibetan syllable is typically structured as follows:
132;;;
133;;; [Prefix] C [C+] V [M] [Suffix [Post suffix]]
134;;;
135;;; where C's are all vertically stacked, V appears below or above
136;;; consonant cluster and M is always put above the C[C+]V combination.
137;;; (Sanskrit visarga, though it is a vowel modifier, is considered
138;;; to be a punctuation.)
139;;;
ff69c4ee 140;;; Here are examples of the words "bsgrubs" and "hfauM"
80d75b56 141;;;
6b00e74e 142;;; \e$(7"7"G###C"U"7"G\e(B \e$(7"H"R"U"_\e(B
80d75b56
KH
143;;;
144;;; M
145;;; b s b s h
ff69c4ee 146;;; g fa
80d75b56
KH
147;;; r u
148;;; u
149;;;
3bdf8898
KH
150;;; Consonants `'' (\e$(7"A\e(B), `w' (\e$(7">\e(B), `y' (\e$(7"B\e(B), `r' (\e$(7"C\e(B) take special
151;;; forms when they are used as subjoined consonant. Consonant `r'
152;;; takes another special form when used as superjoined in such a case
153;;; as "rka", while it does not change its form when conjoined with
154;;; subjoined `'', `w' or `y' as in "rwa", "rya".
155
156;; Append a proper composition rule and glyph to COMPONENTS to compose
157;; CHAR with a composition that has COMPONENTS.
158
159(defun tibetan-add-components (components char)
160 (let ((last (last components))
161 (stack-upper '(tc . bc))
162 (stack-under '(bc . tc))
7258f9ca 163 rule comp-vowel tmp)
3bdf8898
KH
164 ;; Special treatment for 'a chung.
165 ;; If 'a follows a consonant, turn it into the subjoined form.
7258f9ca
KH
166 ;; * Disabled by Tomabechi 2000/06/09 *
167 ;; Because in Unicode, \e$(7"A\e(B may follow directly a consonant without
8f924df7 168 ;; any intervening vowel, as in \e$(7"9"""Q"A!;\e(B=\e$(7"9\e(B \e$(7""\e(B \e$(7"A\e(B not \e$(7"9\e(B \e$(7""\e(B \e$(7"Q\e(B \e$(7"A\e(B
7258f9ca
KH
169 ;;(if (and (= char ?\e$(7"A\e(B)
170 ;; (aref (char-category-set (car last)) ?0))
171 ;; (setq char ?\e$(7"R\e(B)) ;; modified for new font by Tomabechi 1999/12/10
172
173 ;; Composite vowel signs are decomposed before being added
174 ;; Added by Tomabechi 2000/06/08
175 (if (memq char '(?\e$(7"T\e(B ?\e$(7"V\e(B ?\e$(7"W\e(B ?\e$(7"X\e(B ?\e$(7"Y\e(B ?\e$(7"Z\e(B ?\e$(7"b\e(B))
176 (setq comp-vowel
ff69c4ee
KH
177 (copy-sequence
178 (cddr (assoc (char-to-string char)
179 tibetan-composite-vowel-alist)))
7258f9ca
KH
180 char
181 (cadr (assoc (char-to-string char)
182 tibetan-composite-vowel-alist))))
3bdf8898
KH
183 (cond
184 ;; Compose upper vowel sign vertically over.
185 ((aref (char-category-set char) ?2)
186 (setq rule stack-upper))
187
188 ;; Compose lower vowel sign vertically under.
189 ((aref (char-category-set char) ?3)
8f924df7
KH
190 (if (or (eq char ?\e$(7"Q\e(B) ;; `\e$(7"Q\e(B' and `\e$,1FP\e(B' should not visible when composed.
191 (eq char #xF70))
6b12c749
KH
192 (setq rule nil)
193 (setq rule stack-under)))
3bdf8898
KH
194 ;; Transform ra-mgo (superscribed r) if followed by a subjoined
195 ;; consonant other than w, ', y, r.
196 ((and (= (car last) ?\e$(7"C\e(B)
6b12c749
KH
197 (not (memq char '(?\e$(7#>\e(B ?\e$(7"R\e(B ?\e$(7#B\e(B ?\e$(7#C\e(B))))
198 (setcar last ?\e$(7!"\e(B) ;; modified for newfont by Tomabechi 1999/12/10
3bdf8898 199 (setq rule stack-under))
3bdf8898
KH
200 ;; Transform initial base consonant if followed by a subjoined
201 ;; consonant but 'a.
202 (t
203 (let ((laststr (char-to-string (car last))))
6b12c749
KH
204 (if (and (/= char ?\e$(7"R\e(B) ;; modified for new font by Tomabechi
205 (string-match "[\e$(7"!\e(B-\e$(7"="?"@"D\e(B-\e$(7"J"K\e(B]" laststr))
3bdf8898
KH
206 (setcar last (string-to-char
207 (cdr (assoc (char-to-string (car last))
208 tibetan-base-to-subjoined-alist)))))
209 (setq rule stack-under))))
210
6b12c749 211 (if rule
7258f9ca
KH
212 (setcdr last (list rule char)))
213 ;; Added by Tomabechi 2000/06/08
214 (if comp-vowel
215 (nconc last comp-vowel))
216 ))
80d75b56
KH
217
218;;;###autoload
219(defun tibetan-compose-string (str)
3bdf8898
KH
220 "Compose Tibetan string STR."
221 (let ((idx 0))
222 ;; `\e$(7"A\e(B' is included in the pattern for subjoined consonants
223 ;; because we treat it specially in tibetan-add-components.
7258f9ca 224 ;; (This feature is removed by Tomabechi 2000/06/08)
3bdf8898
KH
225 (while (setq idx (string-match tibetan-composable-pattern str idx))
226 (let ((from idx)
227 (to (match-end 0))
228 components)
229 (if (eq (string-match tibetan-precomposition-rule-regexp str idx) idx)
230 (setq idx (match-end 0)
231 components
232 (list (string-to-char
233 (cdr
234 (assoc (match-string 0 str)
235 tibetan-precomposition-rule-alist)))))
236 (setq components (list (aref str idx))
237 idx (1+ idx)))
238 (while (< idx to)
239 (tibetan-add-components components (aref str idx))
240 (setq idx (1+ idx)))
241 (compose-string str from to components))))
242 str)
80d75b56
KH
243
244;;;###autoload
3bdf8898
KH
245(defun tibetan-compose-region (beg end)
246 "Compose Tibetan text the region BEG and END."
80d75b56 247 (interactive "r")
3bdf8898 248 (let (str result chars)
80d75b56
KH
249 (save-excursion
250 (save-restriction
251 (narrow-to-region beg end)
252 (goto-char (point-min))
3bdf8898
KH
253 ;; `\e$(7"A\e(B' is included in the pattern for subjoined consonants
254 ;; because we treat it specially in tibetan-add-components.
7258f9ca 255 ;; (This feature is removed by Tomabechi 2000/06/08)
3bdf8898
KH
256 (while (re-search-forward tibetan-composable-pattern nil t)
257 (let ((from (match-beginning 0))
258 (to (match-end 0))
259 components)
260 (goto-char from)
261 (if (looking-at tibetan-precomposition-rule-regexp)
262 (progn
263 (setq components
264 (list (string-to-char
265 (cdr
266 (assoc (match-string 0)
267 tibetan-precomposition-rule-alist)))))
268 (goto-char (match-end 0)))
269 (setq components (list (char-after from)))
270 (forward-char 1))
271 (while (< (point) to)
272 (tibetan-add-components components (following-char))
273 (forward-char 1))
274 (compose-region from to components)))))))
80d75b56 275
6b12c749
KH
276(defvar tibetan-decompose-precomposition-alist
277 (mapcar (function (lambda (x) (cons (string-to-char (cdr x)) (car x))))
278 tibetan-precomposition-rule-alist))
279
80d75b56 280;;;###autoload
6b12c749
KH
281(defun tibetan-decompose-region (from to)
282 "Decompose Tibetan text in the region FROM and TO.
283This is different from decompose-region because precomposed Tibetan characters
80dadb9e 284are decomposed into normal Tibetan character sequences."
6b12c749
KH
285 (interactive "r")
286 (save-restriction
287 (narrow-to-region from to)
288 (decompose-region from to)
289 (goto-char from)
290 (while (not (eobp))
291 (let* ((char (following-char))
292 (slot (assq char tibetan-decompose-precomposition-alist)))
293 (if slot
294 (progn
295 (delete-char 1)
296 (insert (cdr slot)))
297 (forward-char 1))))))
298
299
3bdf8898 300;;;###autoload
6b12c749
KH
301(defun tibetan-decompose-string (str)
302 "Decompose Tibetan string STR.
303This is different from decompose-string because precomposed Tibetan characters
80dadb9e 304are decomposed into normal Tibetan character sequences."
6b12c749
KH
305 (let ((new "")
306 (len (length str))
307 (idx 0)
308 char slot)
309 (while (< idx len)
310 (setq char (aref str idx)
311 slot (assq (aref str idx) tibetan-decompose-precomposition-alist)
312 new (concat new (if slot (cdr slot) (char-to-string char)))
313 idx (1+ idx)))
314 new))
3bdf8898
KH
315
316;;;###autoload
7d840296
KH
317(defun tibetan-composition-function (from to font-object string)
318 (or (and font-object
319 (font-shape-text from to font-object string))
320 (if string
321 (tibetan-compose-string string)
322 (tibetan-compose-region from to))))
80d75b56
KH
323
324;;;
325;;; This variable is used to avoid repeated decomposition.
326;;;
327(setq-default tibetan-decomposed nil)
328
329;;;###autoload
330(defun tibetan-decompose-buffer ()
331 "Decomposes Tibetan characters in the buffer into their components.
3bdf8898 332See also the documentation of the function `tibetan-decompose-region'."
80d75b56
KH
333 (interactive)
334 (make-local-variable 'tibetan-decomposed)
335 (cond ((not tibetan-decomposed)
336 (tibetan-decompose-region (point-min) (point-max))
337 (setq tibetan-decomposed t))))
338
339;;;###autoload
340(defun tibetan-compose-buffer ()
341 "Composes Tibetan character components in the buffer.
342See also docstring of the function tibetan-compose-region."
343 (interactive)
344 (make-local-variable 'tibetan-decomposed)
345 (tibetan-compose-region (point-min) (point-max))
346 (setq tibetan-decomposed nil))
347
348;;;###autoload
349(defun tibetan-post-read-conversion (len)
350 (save-excursion
351 (save-restriction
352 (let ((buffer-modified-p (buffer-modified-p)))
353 (narrow-to-region (point) (+ (point) len))
354 (tibetan-compose-region (point-min) (point-max))
355 (set-buffer-modified-p buffer-modified-p)
73b4b264
KH
356 (make-local-variable 'tibetan-decomposed)
357 (setq tibetan-decomposed nil)
358 (- (point-max) (point-min))))))
80d75b56
KH
359
360
361;;;###autoload
362(defun tibetan-pre-write-conversion (from to)
363 (setq tibetan-decomposed-temp tibetan-decomposed)
7273226d 364 (let ((old-buf (current-buffer)))
76d12415
KH
365 (set-buffer (generate-new-buffer " *temp*"))
366 (if (stringp from)
367 (insert from)
368 (insert-buffer-substring old-buf from to))
369 (if (not tibetan-decomposed-temp)
370 (tibetan-decompose-region (point-min) (point-max)))
5d0ae729
KH
371 ;; Should return nil as annotations.
372 nil))
80d75b56 373
d633b538
KH
374\f
375;;;
376;;; Unicode-related definitions.
a1506d29 377;;;
d633b538
KH
378
379(defvar tibetan-canonicalize-for-unicode-alist
380 '(("\e$(7"Q\e(B" . "") ;; remove vowel a
381 ("\e$(7"T\e(B" . "\e$(7"R"S\e(B") ;; decompose vowels whose use is ``discouraged'' in Unicode 3.0
382 ("\e$(7"V\e(B" . "\e$(7"R"U\e(B")
383 ("\e$(7"W\e(B" . "\e$(7#C"a\e(B")
384 ("\e$(7"X\e(B" . "\e$(7#C"R"a\e(B")
385 ("\e$(7"Y\e(B" . "\e$(7#D"a\e(B")
386 ("\e$(7"Z\e(B" . "\e$(7#D"R"a\e(B")
387 ("\e$(7"b\e(B" . "\e$(7"R"a\e(B"))
388 "Rules for canonicalizing Tibetan vowels for Unicode.")
389
390(defvar tibetan-canonicalize-for-unicode-regexp
391 "[\e$(7"Q"T"V"W"X"Y"Z"b\e(B]"
392 "Regexp for Tibetan vowels to be canonicalized in Unicode.")
393
394(defun tibetan-canonicalize-for-unicode-region (from to)
395 (save-restriction
396 (narrow-to-region from to)
397 (goto-char from)
398 (while (re-search-forward tibetan-canonicalize-for-unicode-regexp nil t)
399 (let (
400 ;;(from (match-beginning 0))
401 ;;(to (match-end 0))
402 (canonical-form
403 (cdr (assoc (match-string 0)
404 tibetan-canonicalize-for-unicode-alist))))
405 ;;(goto-char from)
406 ;;(delete-region from to)
407 ;;(insert canonical-form)
408 (replace-match canonical-form)
409 ))))
410
411(defvar tibetan-strict-unicode t
412 "*Flag to control Tibetan canonicalizing for Unicode.
413
414If non-nil, the vowel a is removed and composite vowels are decomposed
415before writing buffer in Unicode. See also
416`tibetan-canonicalize-for-unicode-regexp' and
417`tibetan-canonicalize-for-unicode-alist'.")
418
419;;;###autoload
420(defun tibetan-pre-write-canonicalize-for-unicode (from to)
421 (let ((old-buf (current-buffer))
422 (strict-unicode tibetan-strict-unicode))
423 (set-buffer (generate-new-buffer " *temp*"))
424 (if (stringp from)
425 (insert from)
426 (insert-buffer-substring old-buf from to))
427 (if strict-unicode
428 (tibetan-canonicalize-for-unicode-region (point-min) (point-max)))
429 ;; Should return nil as annotations.
430 nil))
431
650e8505 432(provide 'tibet-util)
80d75b56 433
cbee283d 434;; arch-tag: 7a7333e8-1584-446c-b39c-a02b9def265d
e8af40ee 435;;; tibet-util.el ends here