| 1 | ;;; thai-word.el -- find Thai word boundaries |
| 2 | |
| 3 | ;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| 4 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
| 5 | ;; Registration Number H14PRO021 |
| 6 | |
| 7 | ;; Author: Kenichi HANDA <handa@etl.go.jp> |
| 8 | |
| 9 | ;; Keywords: thai, word break, emacs |
| 10 | |
| 11 | ;; This file is part of GNU Emacs. |
| 12 | |
| 13 | ;; GNU Emacs is free software: you can redistribute it and/or modify |
| 14 | ;; it under the terms of the GNU General Public License as published by |
| 15 | ;; the Free Software Foundation, either version 3 of the License, or |
| 16 | ;; (at your option) any later version. |
| 17 | |
| 18 | ;; GNU Emacs is distributed in the hope that it will be useful, |
| 19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 21 | ;; GNU General Public License for more details. |
| 22 | |
| 23 | ;; You should have received a copy of the GNU General Public License |
| 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
| 25 | |
| 26 | ;; The used Thai word list has been taken from IBM's ICU4J project |
| 27 | ;; (file `thai6.ucs', version 1.4, converted to TIS encoding, with |
| 28 | ;; removal of three incorrect entries) to which the following license |
| 29 | ;; applies: |
| 30 | ;; |
| 31 | ;; COPYRIGHT AND PERMISSION NOTICE |
| 32 | ;; |
| 33 | ;; |
| 34 | ;; Copyright (c) 1995-2001 International Business Machines |
| 35 | ;; Corporation and others |
| 36 | ;; |
| 37 | ;; All rights reserved. |
| 38 | ;; |
| 39 | ;; |
| 40 | ;; Permission is hereby granted, free of charge, to any person |
| 41 | ;; obtaining a copy of this software and associated documentation |
| 42 | ;; files (the "Software"), to deal in the Software without |
| 43 | ;; restriction, including without limitation the rights to use, |
| 44 | ;; copy, modify, merge, publish, distribute, and/or sell copies of |
| 45 | ;; the Software, and to permit persons to whom the Software is |
| 46 | ;; furnished to do so, provided that the above copyright notice(s) |
| 47 | ;; and this permission notice appear in all copies of the Software |
| 48 | ;; and that both the above copyright notice(s) and this permission |
| 49 | ;; notice appear in supporting documentation. |
| 50 | ;; |
| 51 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 52 | ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| 53 | ;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 54 | ;; NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE |
| 55 | ;; COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE |
| 56 | ;; FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, |
| 57 | ;; OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR |
| 58 | ;; PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER |
| 59 | ;; TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR |
| 60 | ;; PERFORMANCE OF THIS SOFTWARE. |
| 61 | ;; |
| 62 | ;; Except as contained in this notice, the name of a copyright |
| 63 | ;; holder shall not be used in advertising or otherwise to promote |
| 64 | ;; the sale, use or other dealings in this Software without prior |
| 65 | ;; written authorization of the copyright holder. |
| 66 | |
| 67 | |
| 68 | ;;; Commentary: |
| 69 | |
| 70 | ;; This file implements an algorithm to find Thai word breaks using a |
| 71 | ;; dictionary. It is based on the C program `cttex' written by |
| 72 | ;; Vuthichai Ampornaramveth <vuthi@nii.ac.jp>. |
| 73 | |
| 74 | |
| 75 | ;; Table of Thai words. It is a nested alist (see `nested-alist-p'), |
| 76 | ;; which means that you can easily index the list character by |
| 77 | ;; character. |
| 78 | |
| 79 | (defvar thai-word-table |
| 80 | (let ((table (list 'thai-words))) |
| 81 | (dolist (elt |
| 82 | ;;; The following is indented as this to minimize this file size. |
| 83 |