Commit | Line | Data |
---|---|---|
504f06a2 EZ |
1 | ;;; thai-word.el -- find Thai word boundaries |
2 | ||
5df4f04c | 3 | ;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
675ae7bc KH |
4 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
5 | ;; Registration Number H14PRO021 | |
504f06a2 EZ |
6 | |
7 | ;; Author: Kenichi HANDA <handa@etl.go.jp> | |
8 | ||
9 | ;; Keywords: thai, word break, emacs | |
10 | ||
4936186e GM |
11 | ;; This file is part of GNU Emacs. |
12 | ||
13 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
504f06a2 | 14 | ;; it under the terms of the GNU General Public License as published by |
4936186e GM |
15 | ;; the Free Software Foundation, either version 3 of the License, or |
16 | ;; (at your option) any later version. | |
504f06a2 | 17 | |
4936186e | 18 | ;; GNU Emacs is distributed in the hope that it will be useful, |
504f06a2 EZ |
19 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
20 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | ;; GNU General Public License for more details. | |
22 | ||
23 | ;; You should have received a copy of the GNU General Public License | |
4936186e | 24 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
504f06a2 EZ |
25 | |
26 | ;; The used Thai word list has been taken from IBM's ICU4J project | |
27 | ;; (file `thai6.ucs', version 1.4, converted to TIS encoding, with | |
28 | ;; removal of three incorrect entries) to which the following license | |
29 | ;; applies: | |
30 | ;; | |
31 | ;; COPYRIGHT AND PERMISSION NOTICE | |
32 | ;; | |
33 | ;; | |
34 | ;; Copyright (c) 1995-2001 International Business Machines | |
35 | ;; Corporation and others | |
36 | ;; | |
37 | ;; All rights reserved. | |
38 | ;; | |
39 | ;; | |
40 | ;; Permission is hereby granted, free of charge, to any person | |
41 | ;; obtaining a copy of this software and associated documentation | |
42 | ;; files (the "Software"), to deal in the Software without | |
43 | ;; restriction, including without limitation the rights to use, | |
44 | ;; copy, modify, merge, publish, distribute, and/or sell copies of | |
45 | ;; the Software, and to permit persons to whom the Software is | |
46 | ;; furnished to do so, provided that the above copyright notice(s) | |
47 | ;; and this permission notice appear in all copies of the Software | |
48 | ;; and that both the above copyright notice(s) and this permission | |
49 | ;; notice appear in supporting documentation. | |
50 | ;; | |
51 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
52 | ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
53 | ;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
54 | ;; NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE | |
55 | ;; COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE | |
56 | ;; FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, | |
57 | ;; OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR | |
58 | ;; PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
59 | ;; TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
60 | ;; PERFORMANCE OF THIS SOFTWARE. | |
61 | ;; | |
62 | ;; Except as contained in this notice, the name of a copyright | |
63 | ;; holder shall not be used in advertising or otherwise to promote | |
64 | ;; the sale, use or other dealings in this Software without prior | |
65 | ;; written authorization of the copyright holder. | |
66 | ||
67 | ||
4936186e GM |
68 | ;;; Commentary: |
69 | ||
504f06a2 EZ |
70 | ;; This file implements an algorithm to find Thai word breaks using a |
71 | ;; dictionary. It is based on the C program `cttex' written by | |
72 | ;; Vuthichai Ampornaramveth <vuthi@nii.ac.jp>. | |
73 | ||
74 | ||
75 | ;; Table of Thai words. It is a nested alist (see `nested-alist-p'), | |
76 | ;; which means that you can easily index the list character by | |
77 | ;; character. | |
78 | ||
675ae7bc KH |
79 | (defvar thai-word-table |
80 | (let ((table (list 'thai-words))) | |
81 | (dolist (elt | |
82 | ;;; The following is indented as this to minimize this file size. | |
504f06a2 EZ |
83 |