-;;; thai-util.el --- utilities for Thai -*- coding: iso-2022-7bit; -*-
-
-;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007, 2008
-;; National Institute of Advanced Industrial Science and Technology (AIST)
-;; Registration Number H14PRO021
-;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-;; Free Software Foundation, Inc.
-
-;; Keywords: mule, multilingual, thai
-
-;; This file is part of GNU Emacs.
-
-;; GNU Emacs is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-
-;; GNU Emacs is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;; GNU General Public License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
-
-;;; Commentary:
-
-;;; Code:
-
-(defvar thai-auto-composition-mode)
-
-;; Setting information of Thai characters.
-
-(defconst thai-category-table (make-category-table))
-(define-category ?c "Thai consonant" thai-category-table)
-(define-category ?v "Thai upper/lower vowel" thai-category-table)
-(define-category ?t "Thai tone mark" thai-category-table)
-(define-category ?u "Thai tone mark and upper sign" thai-category-table)
-(define-category ?I "THAI CHARACTER SARA I" thai-category-table)
-(define-category ?U "THAI CHARACTER THANTHAKHAT" thai-category-table)
-
-;; The general composing rules are as follows:
-;;
-;; T
-;; V U V U
-;; CV -> C, CU -> C, CVT -> C, Cv -> C, CvU -> C
-;; v v
-;;
-;; where C: consonant, V: vowel upper, v: vowel lower,
-;; T: tone mark, U: tone mark and upper sign.
-;; Special rule: The sign `\e,Tl\e(B' can be put on the vowel `\e,TT\e(B'.
-
-
-(defvar thai-composition-pattern
- "\\cc\\(\\cu\\|\\cI\\cU\\|\\cv\\ct?\\)\\|\\cv\\ct\\|\\cI\\cU"
- "Regular expression matching a Thai composite sequence.")
-
-(defun thai-self-insert-command (&optional n)
- "Insert the Thai character you type.
-The character will be composed with the surrounding Thai character
-if necessary."
- (interactive "*p")
- (let ((pos (point))
- category-set ch)
- (self-insert-command n)
- (or thai-auto-composition-mode
- (thai-auto-composition (1- (point)) (point) 0))))
-
-(let ((l '((?\e,T!\e(B consonant "LETTER KO KAI") ; 0xA1
- (?\e,T"\e(B consonant "LETTER KHO KHAI") ; 0xA2
- (?\e,T#\e(B consonant "LETTER KHO KHUAT") ; 0xA3
- (?\e,T$\e(B consonant "LETTER KHO KHWAI") ; 0xA4
- (?\e,T%\e(B consonant "LETTER KHO KHON") ; 0xA5
- (?\e,T&\e(B consonant "LETTER KHO RAKHANG") ; 0xA6
- (?\e,T'\e(B consonant "LETTER NGO NGU") ; 0xA7
- (?\e,T(\e(B consonant "LETTER CHO CHAN") ; 0xA8
- (?\e,T)\e(B consonant "LETTER CHO CHING") ; 0xA9
- (?\e,T*\e(B consonant "LETTER CHO CHANG") ; 0xAA
- (?\e,T+\e(B consonant "LETTER SO SO") ; 0xAB
- (?\e,T,\e(B consonant "LETTER CHO CHOE") ; 0xAC
- (?\e,T-\e(B consonant "LETTER YO YING") ; 0xAD
- (?\e,T.\e(B consonant "LETTER DO CHADA") ; 0xAE
- (?\e,T/\e(B consonant "LETTER TO PATAK") ; 0xAF
- (?\e,T0\e(B consonant "LETTER THO THAN") ; 0xB0
- (?\e,T1\e(B consonant "LETTER THO NANGMONTHO") ; 0xB1
- (?\e,T2\e(B consonant "LETTER THO PHUTHAO") ; 0xB2
- (?\e,T3\e(B consonant "LETTER NO NEN") ; 0xB3
- (?\e,T4\e(B consonant "LETTER DO DEK") ; 0xB4
- (?\e,T5\e(B consonant "LETTER TO TAO") ; 0xB5
- (?\e,T6\e(B consonant "LETTER THO THUNG") ; 0xB6
- (?\e,T7\e(B consonant "LETTER THO THAHAN") ; 0xB7
- (?\e,T8\e(B consonant "LETTER THO THONG") ; 0xB8
- (?\e,T9\e(B consonant "LETTER NO NU") ; 0xB9
- (?\e,T:\e(B consonant "LETTER BO BAIMAI") ; 0xBA
- (?\e,T;\e(B consonant "LETTER PO PLA") ; 0xBB
- (?\e,T<\e(B consonant "LETTER PHO PHUNG") ; 0xBC
- (?\e,T=\e(B consonant "LETTER FO FA") ; 0xBD
- (?\e,T>\e(B consonant "LETTER PHO PHAN") ; 0xBE
- (?\e,T?\e(B consonant "LETTER FO FAN") ; 0xBF
- (?\e,T@\e(B consonant "LETTER PHO SAMPHAO") ; 0xC0
- (?\e,TA\e(B consonant "LETTER MO MA") ; 0xC1
- (?\e,TB\e(B consonant "LETTER YO YAK") ; 0xC2
- (?\e,TC\e(B consonant "LETTER RO RUA") ; 0xC3
- (?\e,TD\e(B vowel-base "LETTER RU (Pali vowel letter)") ; 0xC4
- (?\e,TE\e(B consonant "LETTER LO LING") ; 0xC5
- (?\e,TF\e(B vowel-base "LETTER LU (Pali vowel letter)") ; 0xC6
- (?\e,TG\e(B consonant "LETTER WO WAEN") ; 0xC7
- (?\e,TH\e(B consonant "LETTER SO SALA") ; 0xC8
- (?\e,TI\e(B consonant "LETTER SO RUSI") ; 0xC9
- (?\e,TJ\e(B consonant "LETTER SO SUA") ; 0xCA
- (?\e,TK\e(B consonant "LETTER HO HIP") ; 0xCB
- (?\e,TL\e(B consonant "LETTER LO CHULA") ; 0xCC
- (?\e,TM\e(B consonant "LETTER O ANG") ; 0xCD
- (?\e,TN\e(B consonant "LETTER HO NOK HUK") ; 0xCE
- (?\e,TO\e(B special "PAI YAN NOI (abbreviation)") ; 0xCF
- (?\e,TP\e(B vowel-base "VOWEL SIGN SARA A") ; 0xD0
- (?\e,TQ\e(B vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T") ; 0xD1
- (?\e,TR\e(B vowel-base "VOWEL SIGN SARA AA") ; 0xD2
- (?\e,TS\e(B vowel-base "VOWEL SIGN SARA AM") ; 0xD3
- (?\e,TT\e(B vowel-upper "VOWEL SIGN SARA I N/S-T") ; 0xD4
- (?\e,TU\e(B vowel-upper "VOWEL SIGN SARA II N/S-T") ; 0xD5
- (?\e,TV\e(B vowel-upper "VOWEL SIGN SARA UE N/S-T") ; 0xD6
- (?\e,TW\e(B vowel-upper "VOWEL SIGN SARA UEE N/S-T") ; 0xD7
- (?\e,TX\e(B vowel-lower "VOWEL SIGN SARA U N/S-B") ; 0xD8
- (?\e,TY\e(B vowel-lower "VOWEL SIGN SARA UU N/S-B") ; 0xD9
- (?\e,TZ\e(B vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)") ; 0xDA
- (?\e,T[\e(B invalid nil) ; 0xDA
- (?\e,T\\e(B invalid nil) ; 0xDC
- (?\e,T]\e(B invalid nil) ; 0xDC
- (?\e,T^\e(B invalid nil) ; 0xDC
- (?\e,T_\e(B special "BAHT SIGN (currency symbol)") ; 0xDF
- (?\e,T`\e(B vowel-base "VOWEL SIGN SARA E") ; 0xE0
- (?\e,Ta\e(B vowel-base "VOWEL SIGN SARA AE") ; 0xE1
- (?\e,Tb\e(B vowel-base "VOWEL SIGN SARA O") ; 0xE2
- (?\e,Tc\e(B vowel-base "VOWEL SIGN SARA MAI MUAN") ; 0xE3
- (?\e,Td\e(B vowel-base "VOWEL SIGN SARA MAI MALAI") ; 0xE4
- (?\e,Te\e(B vowel-base "LAK KHANG YAO") ; 0xE5
- (?\e,Tf\e(B special "MAI YAMOK (repetion)") ; 0xE6
- (?\e,Tg\e(B sign-upper "VOWEL SIGN MAI TAI KHU N/S-T") ; 0xE7
- (?\e,Th\e(B tone "TONE MAI EK N/S-T") ; 0xE8
- (?\e,Ti\e(B tone "TONE MAI THO N/S-T") ; 0xE9
- (?\e,Tj\e(B tone "TONE MAI TRI N/S-T") ; 0xEA
- (?\e,Tk\e(B tone "TONE MAI CHATTAWA N/S-T") ; 0xEB
- (?\e,Tl\e(B sign-upper "THANTHAKHAT N/S-T (cancellation mark)") ; 0xEC
- (?\e,Tm\e(B sign-upper "NIKKHAHIT N/S-T (final nasal)") ; 0xED
- (?\e,Tn\e(B sign-upper "YAMAKKAN N/S-T") ; 0xEE
- (?\e,To\e(B special "FONRMAN") ; 0xEF
- (?\e,Tp\e(B special "DIGIT ZERO") ; 0xF0
- (?\e,Tq\e(B special "DIGIT ONE") ; 0xF1
- (?\e,Tr\e(B special "DIGIT TWO") ; 0xF2
- (?\e,Ts\e(B special "DIGIT THREE") ; 0xF3
- (?\e,Tt\e(B special "DIGIT FOUR") ; 0xF4
- (?\e,Tu\e(B special "DIGIT FIVE") ; 0xF5
- (?\e,Tv\e(B special "DIGIT SIX") ; 0xF6
- (?\e,Tw\e(B special "DIGIT SEVEN") ; 0xF7
- (?\e,Tx\e(B special "DIGIT EIGHT") ; 0xF8
- (?\e,Ty\e(B special "DIGIT NINE") ; 0xF9
- (?\e,Tz\e(B special "ANGKHANKHU (ellipsis)") ; 0xFA
- (?\e,T{\e(B special "KHOMUT (beginning of religious texts)") ; 0xFB
- (?\e,T|\e(B invalid nil) ; 0xFC
- (?\e,T}\e(B invalid nil) ; 0xFD
- (?\e,T~\e(B invalid nil) ; 0xFE
-
- ;; Unicode equivalents
- (?\e$,1Ba\e(B consonant "LETTER KO KAI")
- (?\e$,1Bb\e(B consonant "LETTER KHO KHAI")
- (?\e$,1Bc\e(B consonant "LETTER KHO KHUAT")
- (?\e$,1Bd\e(B consonant "LETTER KHO KHWAI")
- (?\e$,1Be\e(B consonant "LETTER KHO KHON")
- (?\e$,1Bf\e(B consonant "LETTER KHO RAKHANG")
- (?\e$,1Bg\e(B consonant "LETTER NGO NGU")
- (?\e$,1Bh\e(B consonant "LETTER CHO CHAN")
- (?\e$,1Bi\e(B consonant "LETTER CHO CHING")
- (?\e$,1Bj\e(B consonant "LETTER CHO CHANG")
- (?\e$,1Bk\e(B consonant "LETTER SO SO")
- (?\e$,1Bl\e(B consonant "LETTER CHO CHOE")
- (?\e$,1Bm\e(B consonant "LETTER YO YING")
- (?\e$,1Bn\e(B consonant "LETTER DO CHADA")
- (?\e$,1Bo\e(B consonant "LETTER TO PATAK")
- (?\e$,1Bp\e(B consonant "LETTER THO THAN")
- (?\e$,1Bq\e(B consonant "LETTER THO NANGMONTHO")
- (?\e$,1Br\e(B consonant "LETTER THO PHUTHAO")
- (?\e$,1Bs\e(B consonant "LETTER NO NEN")
- (?\e$,1Bt\e(B consonant "LETTER DO DEK")
- (?\e$,1Bu\e(B consonant "LETTER TO TAO")
- (?\e$,1Bv\e(B consonant "LETTER THO THUNG")
- (?\e$,1Bw\e(B consonant "LETTER THO THAHAN")
- (?\e$,1Bx\e(B consonant "LETTER THO THONG")
- (?\e$,1By\e(B consonant "LETTER NO NU")
- (?\e$,1Bz\e(B consonant "LETTER BO BAIMAI")
- (?\e$,1B{\e(B consonant "LETTER PO PLA")
- (?\e$,1B|\e(B consonant "LETTER PHO PHUNG")
- (?\e$,1B}\e(B consonant "LETTER FO FA")
- (?\e$,1B~\e(B consonant "LETTER PHO PHAN")
- (?\e$,1B\7f\e(B consonant "LETTER FO FAN")
- (?\e$,1C \e(B consonant "LETTER PHO SAMPHAO")
- (?\e$,1C!\e(B consonant "LETTER MO MA")
- (?\e$,1C"\e(B consonant "LETTER YO YAK")
- (?\e$,1C#\e(B consonant "LETTER RO RUA")
- (?\e$,1C$\e(B vowel-base "LETTER RU (Pali vowel letter)")
- (?\e$,1C%\e(B consonant "LETTER LO LING")
- (?\e$,1C&\e(B vowel-base "LETTER LU (Pali vowel letter)")
- (?\e$,1C'\e(B consonant "LETTER WO WAEN")
- (?\e$,1C(\e(B consonant "LETTER SO SALA")
- (?\e$,1C)\e(B consonant "LETTER SO RUSI")
- (?\e$,1C*\e(B consonant "LETTER SO SUA")
- (?\e$,1C+\e(B consonant "LETTER HO HIP")
- (?\e$,1C,\e(B consonant "LETTER LO CHULA")
- (?\e$,1C-\e(B consonant "LETTER O ANG")
- (?\e$,1C.\e(B consonant "LETTER HO NOK HUK")
- (?\e$,1C/\e(B special "PAI YAN NOI (abbreviation)")
- (?\e$,1C0\e(B vowel-base "VOWEL SIGN SARA A")
- (?\e$,1C1\e(B vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T")
- (?\e$,1C2\e(B vowel-base "VOWEL SIGN SARA AA")
- (?\e$,1C3\e(B vowel-base "VOWEL SIGN SARA AM")
- (?\e$,1C4\e(B vowel-upper "VOWEL SIGN SARA I N/S-T")
- (?\e$,1C5\e(B vowel-upper "VOWEL SIGN SARA II N/S-T")
- (?\e$,1C6\e(B vowel-upper "VOWEL SIGN SARA UE N/S-T")
- (?\e$,1C7\e(B vowel-upper "VOWEL SIGN SARA UEE N/S-T")
- (?\e$,1C8\e(B vowel-lower "VOWEL SIGN SARA U N/S-B")
- (?\e$,1C9\e(B vowel-lower "VOWEL SIGN SARA UU N/S-B")
- (?\e$,1C:\e(B vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)")
- (?\e$,1C?\e(B special "BAHT SIGN (currency symbol)")
- (?\e$,1C@\e(B vowel-base "VOWEL SIGN SARA E")
- (?\e$,1CA\e(B vowel-base "VOWEL SIGN SARA AE")
- (?\e$,1CB\e(B vowel-base "VOWEL SIGN SARA O")
- (?\e$,1CC\e(B vowel-base "VOWEL SIGN SARA MAI MUAN")
- (?\e$,1CD\e(B vowel-base "VOWEL SIGN SARA MAI MALAI")
- (?\e$,1CE\e(B vowel-base "LAK KHANG YAO")
- (?\e$,1CF\e(B special "MAI YAMOK (repetion)")
- (?\e$,1CG\e(B sign-upper "VOWEL SIGN MAI TAI KHU N/S-T")
- (?\e$,1CH\e(B tone "TONE MAI EK N/S-T")
- (?\e$,1CI\e(B tone "TONE MAI THO N/S-T")
- (?\e$,1CJ\e(B tone "TONE MAI TRI N/S-T")
- (?\e$,1CK\e(B tone "TONE MAI CHATTAWA N/S-T")
- (?\e$,1CL\e(B sign-upper "THANTHAKHAT N/S-T (cancellation mark)")
- (?\e$,1CM\e(B sign-upper "NIKKHAHIT N/S-T (final nasal)")
- (?\e$,1CN\e(B sign-upper "YAMAKKAN N/S-T")
- (?\e$,1CO\e(B special "FONRMAN")
- (?\e$,1CP\e(B special "DIGIT ZERO")
- (?\e$,1CQ\e(B special "DIGIT ONE")
- (?\e$,1CR\e(B special "DIGIT TWO")
- (?\e$,1CS\e(B special "DIGIT THREE")
- (?\e$,1CT\e(B special "DIGIT FOUR")
- (?\e$,1CU\e(B special "DIGIT FIVE")
- (?\e$,1CV\e(B special "DIGIT SIX")
- (?\e$,1CW\e(B special "DIGIT SEVEN")
- (?\e$,1CX\e(B special "DIGIT EIGHT")
- (?\e$,1CY\e(B special "DIGIT NINE")
- (?\e$,1CZ\e(B special "ANGKHANKHU (ellipsis)")
- (?\e$,1C[\e(B special "KHOMUT (beginning of religious texts)")
- ))
- elm)
- (while l
- (setq elm (car l) l (cdr l))
- (let ((char (car elm))
- (ptype (nth 1 elm)))
- (put-char-code-property char 'phonetic-type ptype)
- (cond ((eq ptype 'consonant)
- (modify-category-entry char ?c thai-category-table)
- (global-set-key (vector char) 'thai-self-insert-command))
- ((memq ptype '(vowel-upper vowel-lower))
- (modify-category-entry char ?v thai-category-table)
- (if (or (= char ?\e,TT\e(B) (= char ?\e$,1C4\e(B))
- ;; Give category `I' to "SARA I".
- (modify-category-entry char ?I thai-category-table))
- (global-set-key (vector char) 'thai-self-insert-command))
- ((eq ptype 'tone)
- (modify-category-entry char ?t thai-category-table)
- (modify-category-entry char ?u thai-category-table)
- (global-set-key (vector char) 'thai-self-insert-command))
- ((eq ptype 'sign-upper)
- (modify-category-entry char ?u thai-category-table)
- (if (or (= char ?\e,Tl\e(B) (= char ?\e$,1CL\e(B))
- ;; Give category `U' to "THANTHAKHAT".
- (modify-category-entry char ?U thai-category-table))
- (global-set-key (vector char) 'thai-self-insert-command)))
- (put-char-code-property char 'name (nth 2 elm)))))
-
-(defun thai-compose-syllable (beg end &optional category-set string)
- (or category-set
- (setq category-set
- (char-category-set (if string (aref string beg) (char-after beg)))))
- (if (aref category-set ?c)
- ;; Starting with a consonant. We do relative composition.
- (if string
- (compose-string string beg end)
- (compose-region beg end))
- ;; Vowel tone sequence.
- (if string
- (compose-string string beg end (list (aref string beg) '(Bc . Bc)
- (aref string (1+ beg))))
- (compose-region beg end (list (char-after beg) '(Bc . Bc)
- (char-after (1+ beg))))))
- (- end beg))
-
-;;;###autoload
-(defun thai-compose-region (beg end)
- "Compose Thai characters in the region.
-When called from a program, expects two arguments,
-positions (integers or markers) specifying the region."
- (interactive "r")
- (let ((pos (point)))
- (save-restriction
- (narrow-to-region beg end)
- (goto-char (point-min))
- (with-category-table thai-category-table
- (while (re-search-forward thai-composition-pattern nil t)
- (setq beg (match-beginning 0) end (match-end 0))
- (if (and (> pos beg) (< pos end))
- (setq pos end))
- (thai-compose-syllable beg end
- (char-category-set (char-after beg))))))
- (goto-char pos)))
-
-;;;###autoload
-(defun thai-compose-string (string)
- "Compose Thai characters in STRING and return the resulting string."
- (with-category-table thai-category-table
- (let ((idx 0))
- (while (setq idx (string-match thai-composition-pattern string idx))
- (thai-compose-syllable idx (match-end 0) nil string)
- (setq idx (match-end 0)))))
- string)
-
-;;;###autoload
-(defun thai-compose-buffer ()
- "Compose Thai characters in the current buffer."
- (interactive)
- (thai-compose-region (point-min) (point-max)))
-
-;;;###autoload
-(defun thai-post-read-conversion (len)
- (thai-compose-region (point) (+ (point) len))
- len)
-
-;;;###autoload
-(defun thai-composition-function (from to pattern &optional string)
- "Compose Thai text in the region FROM and TO.
-The text matches the regular expression PATTERN.
-Optional 4th argument STRING, if non-nil, is a string containing text
-to compose.
-
-The return value is number of composed characters."
- (when (and (not thai-auto-composition-mode)
- (< (1+ from) to))
- (with-category-table thai-category-table
- (if string
- (if (eq (string-match thai-composition-pattern string from) from)
- (thai-compose-syllable from (match-end 0) nil string))
- (if (save-excursion
- (goto-char from)
- (and (looking-at thai-composition-pattern)
- (setq to (match-end 0))))
- (thai-compose-syllable from to))))))
-
-(defun thai-auto-composition (beg end len)
- (with-category-table thai-category-table
- (let (category-set)
- (while (and (> beg (point-min))
- (setq category-set (char-category-set (char-after (1- beg))))
- (or (aref category-set ?v) (aref category-set ?u)))
- (setq beg (1- beg)))
- (if (and (> beg (point-min))
- (aref (char-category-set (char-after (1- beg))) ?c))
- (setq beg (1- beg)))
- (while (and (< end (point-max))
- (setq category-set (char-category-set (char-after end)))
- (or (aref category-set ?v) (aref category-set ?u)))
- (setq end (1+ end)))
- (if (< beg end)
- (thai-compose-region beg end)))))
-
-(put 'thai-auto-composition-mode 'permanent-local t)
-
-;;;###autoload
-(define-minor-mode thai-auto-composition-mode
- "Minor mode for automatically correct Thai character composition."
- :group 'mule
- (cond ((null thai-auto-composition-mode)
- (remove-hook 'after-change-functions 'thai-auto-composition))
- (t
- (add-hook 'after-change-functions 'thai-auto-composition))))
-
-;; Thai-word-mode requires functions in the feature `thai-word'.
-(require 'thai-word)
-
-(defvar thai-word-mode-map
- (let ((map (make-sparse-keymap)))
- (define-key map [remap forward-word] 'thai-forward-word)
- (define-key map [remap backward-word] 'thai-backward-word)
- (define-key map [remap kill-word] 'thai-kill-word)
- (define-key map [remap backward-kill-word] 'thai-backward-kill-word)
- (define-key map [remap transpose-words] 'thai-transpose-words)
- map)
- "Keymap for `thai-word-mode'.")
-
-(define-minor-mode thai-word-mode
- "Minor mode to make word-oriented commands aware of Thai words.
-The commands affected are \\[forward-word], \\[backward-word], \\[kill-word], \\[backward-kill-word], \\[transpose-words], and \\[fill-paragraph]."
- :global t :group 'mule
- (cond (thai-word-mode
- ;; This enables linebreak between Thai characters.
- (modify-category-entry (make-char 'thai-tis620) ?|)
- ;; This enables linebreak at a Thai word boundary.
- (put-charset-property 'thai-tis620 'fill-find-break-point-function
- 'thai-fill-find-break-point))
- (t
- (modify-category-entry (make-char 'thai-tis620) ?| nil t)
- (put-charset-property 'thai-tis620 'fill-find-break-point-function
- nil))))
-
-;; Function to call on entering the Thai language environment.
-(defun setup-thai-language-environment-internal ()
- (thai-word-mode 1))
-
-;; Function to call on exiting the Thai language environment.
-(defun exit-thai-language-environment-internal ()
- (thai-word-mode -1))
-
-;;
-(provide 'thai-util)
-
-;;; arch-tag: 59425d6a-8cf9-4e06-a6ab-8ab7dc7a7a97
-;;; thai-util.el ends here
+;;; thai-util.el --- utilities for Thai -*- coding: utf-8; -*-
+
+;; Copyright (C) 2000-2012 Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+;; 2005, 2006, 2007, 2008, 2009, 2010, 2011
+;; National Institute of Advanced Industrial Science and Technology (AIST)
+;; Registration Number H14PRO021
+
+;; Keywords: mule, multilingual, Thai, i18n
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+
+(defvar thai-auto-composition-mode)
+
+;; Setting information of Thai characters.
+
+(defconst thai-category-table (make-category-table))
+(define-category ?c "Thai consonant" thai-category-table)
+(define-category ?v "Thai upper/lower vowel" thai-category-table)
+(define-category ?t "Thai tone mark" thai-category-table)
+(define-category ?u "Thai tone mark and upper sign" thai-category-table)
+(define-category ?I "THAI CHARACTER SARA I" thai-category-table)
+(define-category ?U "THAI CHARACTER THANTHAKHAT" thai-category-table)
+
+;; The general composing rules are as follows:
+;;
+;; T
+;; V U V U
+;; CV -> C, CU -> C, CVT -> C, Cv -> C, CvU -> C
+;; v v
+;;
+;; where C: consonant, V: vowel upper, v: vowel lower,
+;; T: tone mark, U: tone mark and upper sign.
+;; Special rule: The sign `์' can be put on the vowel `ิ'.
+
+
+(defvar thai-composition-pattern
+ "\\cc\\(\\cu\\|\\cI\\cU\\|\\cv\\ct?\\)\\|\\cv\\ct\\|\\cI\\cU"
+ "Regular expression matching a Thai composite sequence.")
+
+(let ((l '((?ก consonant "LETTER KO KAI") ; 0xA1
+ (?ข consonant "LETTER KHO KHAI") ; 0xA2
+ (?ฃ consonant "LETTER KHO KHUAT") ; 0xA3
+ (?ค consonant "LETTER KHO KHWAI") ; 0xA4
+ (?ฅ consonant "LETTER KHO KHON") ; 0xA5
+ (?ฆ consonant "LETTER KHO RAKHANG") ; 0xA6
+ (?ง consonant "LETTER NGO NGU") ; 0xA7
+ (?จ consonant "LETTER CHO CHAN") ; 0xA8
+ (?ฉ consonant "LETTER CHO CHING") ; 0xA9
+ (?ช consonant "LETTER CHO CHANG") ; 0xAA
+ (?ซ consonant "LETTER SO SO") ; 0xAB
+ (?ฌ consonant "LETTER CHO CHOE") ; 0xAC
+ (?ญ consonant "LETTER YO YING") ; 0xAD
+ (?ฎ consonant "LETTER DO CHADA") ; 0xAE
+ (?ฏ consonant "LETTER TO PATAK") ; 0xAF
+ (?ฐ consonant "LETTER THO THAN") ; 0xB0
+ (?ฑ consonant "LETTER THO NANGMONTHO") ; 0xB1
+ (?ฒ consonant "LETTER THO PHUTHAO") ; 0xB2
+ (?ณ consonant "LETTER NO NEN") ; 0xB3
+ (?ด consonant "LETTER DO DEK") ; 0xB4
+ (?ต consonant "LETTER TO TAO") ; 0xB5
+ (?ถ consonant "LETTER THO THUNG") ; 0xB6
+ (?ท consonant "LETTER THO THAHAN") ; 0xB7
+ (?ธ consonant "LETTER THO THONG") ; 0xB8
+ (?น consonant "LETTER NO NU") ; 0xB9
+ (?บ consonant "LETTER BO BAIMAI") ; 0xBA
+ (?ป consonant "LETTER PO PLA") ; 0xBB
+ (?ผ consonant "LETTER PHO PHUNG") ; 0xBC
+ (?ฝ consonant "LETTER FO FA") ; 0xBD
+ (?พ consonant "LETTER PHO PHAN") ; 0xBE
+ (?ฟ consonant "LETTER FO FAN") ; 0xBF
+ (?ภ consonant "LETTER PHO SAMPHAO") ; 0xC0
+ (?ม consonant "LETTER MO MA") ; 0xC1
+ (?ย consonant "LETTER YO YAK") ; 0xC2
+ (?ร consonant "LETTER RO RUA") ; 0xC3
+ (?ฤ vowel-base "LETTER RU (Pali vowel letter)") ; 0xC4
+ (?ล consonant "LETTER LO LING") ; 0xC5
+ (?ฦ vowel-base "LETTER LU (Pali vowel letter)") ; 0xC6
+ (?ว consonant "LETTER WO WAEN") ; 0xC7
+ (?ศ consonant "LETTER SO SALA") ; 0xC8
+ (?ษ consonant "LETTER SO RUSI") ; 0xC9
+ (?ส consonant "LETTER SO SUA") ; 0xCA
+ (?ห consonant "LETTER HO HIP") ; 0xCB
+ (?ฬ consonant "LETTER LO CHULA") ; 0xCC
+ (?อ consonant "LETTER O ANG") ; 0xCD
+ (?ฮ consonant "LETTER HO NOK HUK") ; 0xCE
+ (?ฯ special "PAI YAN NOI (abbreviation)") ; 0xCF
+ (?ะ vowel-base "VOWEL SIGN SARA A") ; 0xD0
+ (?ั vowel-upper "VOWEL SIGN MAI HAN-AKAT N/S-T") ; 0xD1
+ (?า vowel-base "VOWEL SIGN SARA AA") ; 0xD2
+ (?ำ vowel-base "VOWEL SIGN SARA AM") ; 0xD3
+ (?ิ vowel-upper "VOWEL SIGN SARA I N/S-T") ; 0xD4
+ (?ี vowel-upper "VOWEL SIGN SARA II N/S-T") ; 0xD5
+ (?ึ vowel-upper "VOWEL SIGN SARA UE N/S-T") ; 0xD6
+ (?ื vowel-upper "VOWEL SIGN SARA UEE N/S-T") ; 0xD7
+ (?ุ vowel-lower "VOWEL SIGN SARA U N/S-B") ; 0xD8
+ (?ู vowel-lower "VOWEL SIGN SARA UU N/S-B") ; 0xD9
+ (?ฺ vowel-lower "VOWEL SIGN PHINTHU N/S-B (Pali virama)") ; 0xDA
+ (? invalid nil) ; 0xDA
+ (? invalid nil) ; 0xDC
+ (? invalid nil) ; 0xDC
+ (? invalid nil) ; 0xDC
+ (?฿ special "BAHT SIGN (currency symbol)") ; 0xDF
+ (?เ vowel-base "VOWEL SIGN SARA E") ; 0xE0
+ (?แ vowel-base "VOWEL SIGN SARA AE") ; 0xE1
+ (?โ vowel-base "VOWEL SIGN SARA O") ; 0xE2
+ (?ใ vowel-base "VOWEL SIGN SARA MAI MUAN") ; 0xE3
+ (?ไ vowel-base "VOWEL SIGN SARA MAI MALAI") ; 0xE4
+ (?ๅ vowel-base "LAK KHANG YAO") ; 0xE5
+ (?ๆ special "MAI YAMOK (repetition)") ; 0xE6
+ (?็ sign-upper "VOWEL SIGN MAI TAI KHU N/S-T") ; 0xE7
+ (?่ tone "TONE MAI EK N/S-T") ; 0xE8
+ (?้ tone "TONE MAI THO N/S-T") ; 0xE9
+ (?๊ tone "TONE MAI TRI N/S-T") ; 0xEA
+ (?๋ tone "TONE MAI CHATTAWA N/S-T") ; 0xEB
+ (?์ sign-upper "THANTHAKHAT N/S-T (cancellation mark)") ; 0xEC
+ (?ํ sign-upper "NIKKHAHIT N/S-T (final nasal)") ; 0xED
+ (?๎ sign-upper "YAMAKKAN N/S-T") ; 0xEE
+ (?๏ special "FONRMAN") ; 0xEF
+ (?๐ special "DIGIT ZERO") ; 0xF0
+ (?๑ special "DIGIT ONE") ; 0xF1
+ (?๒ special "DIGIT TWO") ; 0xF2
+ (?๓ special "DIGIT THREE") ; 0xF3
+ (?๔ special "DIGIT FOUR") ; 0xF4
+ (?๕ special "DIGIT FIVE") ; 0xF5
+ (?๖ special "DIGIT SIX") ; 0xF6
+ (?๗ special "DIGIT SEVEN") ; 0xF7
+ (?๘ special "DIGIT EIGHT") ; 0xF8
+ (?๙ special "DIGIT NINE") ; 0xF9
+ (?๚ special "ANGKHANKHU (ellipsis)") ; 0xFA
+ (?๛ special "KHOMUT (beginning of religious texts)") ; 0xFB
+ (? invalid nil) ; 0xFC
+ (? invalid nil) ; 0xFD
+ (? invalid nil) ; 0xFE
+ ))
+ elm)
+ (while l
+ (setq elm (car l) l (cdr l))
+ (let ((char (car elm))
+ (ptype (nth 1 elm)))
+ (put-char-code-property char 'phonetic-type ptype)
+ (cond ((eq ptype 'consonant)
+ (modify-category-entry char ?c thai-category-table))
+ ((memq ptype '(vowel-upper vowel-lower))
+ (modify-category-entry char ?v thai-category-table)
+ (if (= char ?ิ)
+ ;; Give category `I' to "SARA I".
+ (modify-category-entry char ?I thai-category-table)))
+ ((eq ptype 'tone)
+ (modify-category-entry char ?t thai-category-table)
+ (modify-category-entry char ?u thai-category-table))
+ ((eq ptype 'sign-upper)
+ (modify-category-entry char ?u thai-category-table)
+ (if (= char ?์)
+ ;; Give category `U' to "THANTHAKHAT".
+ (modify-category-entry char ?U thai-category-table))))
+ (put-char-code-property char 'name (nth 2 elm)))))
+
+(defun thai-compose-syllable (beg end &optional category-set string)
+ (or category-set
+ (setq category-set
+ (char-category-set (if string (aref string beg) (char-after beg)))))
+ (if (aref category-set ?c)
+ ;; Starting with a consonant. We do relative composition.
+ (if string
+ (compose-string string beg end)
+ (compose-region beg end))
+ ;; Vowel tone sequence.
+ (if string
+ (compose-string string beg end (list (aref string beg) '(Bc . Bc)
+ (aref string (1+ beg))))
+ (compose-region beg end (list (char-after beg) '(Bc . Bc)
+ (char-after (1+ beg))))))
+ (- end beg))
+
+;;;###autoload
+(defun thai-compose-region (beg end)
+ "Compose Thai characters in the region.
+When called from a program, expects two arguments,
+positions (integers or markers) specifying the region."
+ (interactive "r")
+ (let ((pos (point)))
+ (save-restriction
+ (narrow-to-region beg end)
+ (goto-char (point-min))
+ (with-category-table thai-category-table
+ (while (re-search-forward thai-composition-pattern nil t)
+ (setq beg (match-beginning 0) end (match-end 0))
+ (if (and (> pos beg) (< pos end))
+ (setq pos end))
+ (thai-compose-syllable beg end
+ (char-category-set (char-after beg))))))
+ (goto-char pos)))
+
+;;;###autoload
+(defun thai-compose-string (string)
+ "Compose Thai characters in STRING and return the resulting string."
+ (with-category-table thai-category-table
+ (let ((idx 0))
+ (while (setq idx (string-match thai-composition-pattern string idx))
+ (thai-compose-syllable idx (match-end 0) nil string)
+ (setq idx (match-end 0)))))
+ string)
+
+;;;###autoload
+(defun thai-compose-buffer ()
+ "Compose Thai characters in the current buffer."
+ (interactive)
+ (thai-compose-region (point-min) (point-max)))
+
+;;;###autoload
+(defun thai-composition-function (gstring)
+ (if (= (lgstring-char-len gstring) 1)
+ (compose-gstring-for-graphic gstring)
+ (or (font-shape-gstring gstring)
+ (let ((glyph-len (lgstring-glyph-len gstring))
+ (last-char (lgstring-char gstring
+ (1- (lgstring-char-len gstring))))
+ (i 0)
+ glyph)
+ (while (and (< i glyph-len)
+ (setq glyph (lgstring-glyph gstring i)))
+ (setq i (1+ i)))
+ (if (= last-char ?ำ)
+ (setq i (1- i)))
+ (compose-glyph-string-relative gstring 0 i 0.1)))))
+
+;; Thai-word-mode requires functions in the feature `thai-word'.
+(require 'thai-word)
+
+(defvar thai-word-mode-map
+ (let ((map (make-sparse-keymap)))
+ (define-key map [remap forward-word] 'thai-forward-word)
+ (define-key map [remap backward-word] 'thai-backward-word)
+ (define-key map [remap kill-word] 'thai-kill-word)
+ (define-key map [remap backward-kill-word] 'thai-backward-kill-word)
+ (define-key map [remap transpose-words] 'thai-transpose-words)
+ map)
+ "Keymap for `thai-word-mode'.")
+
+(define-minor-mode thai-word-mode
+ "Minor mode to make word-oriented commands aware of Thai words.
+With a prefix argument ARG, enable the mode if ARG is positive,
+and disable it otherwise. If called from Lisp, enable the mode
+if ARG is omitted or nil. The commands affected are
+\\[forward-word], \\[backward-word], \\[kill-word], \\[backward-kill-word],
+\\[transpose-words], and \\[fill-paragraph]."
+ :global t :group 'mule
+ (cond (thai-word-mode
+ ;; This enables linebreak between Thai characters.
+ (modify-category-entry (make-char 'thai-tis620) ?|)
+ ;; This enables linebreak at a Thai word boundary.
+ (put-charset-property 'thai-tis620 'fill-find-break-point-function
+ 'thai-fill-find-break-point))
+ (t
+ (modify-category-entry (make-char 'thai-tis620) ?| nil t)
+ (put-charset-property 'thai-tis620 'fill-find-break-point-function
+ nil))))
+
+;; Function to call on entering the Thai language environment.
+(defun setup-thai-language-environment-internal ()
+ (thai-word-mode 1))
+
+;; Function to call on exiting the Thai language environment.
+(defun exit-thai-language-environment-internal ()
+ (thai-word-mode -1))
+
+;;
+(provide 'thai-util)
+
+;;; thai-util.el ends here