Commit | Line | Data |
---|---|---|
504f06a2 EZ |
1 | ;;; thai-word.el -- find Thai word boundaries |
2 | ||
38141d20 | 3 | ;; Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 |
675ae7bc KH |
4 | ;; National Institute of Advanced Industrial Science and Technology (AIST) |
5 | ;; Registration Number H14PRO021 | |
504f06a2 EZ |
6 | |
7 | ;; Author: Kenichi HANDA <handa@etl.go.jp> | |
8 | ||
9 | ;; Keywords: thai, word break, emacs | |
10 | ||
11 | ;; This program is free software; you can redistribute it and/or modify | |
12 | ;; it under the terms of the GNU General Public License as published by | |
d7142f3e | 13 | ;; the Free Software Foundation; either version 3, or (at your option) |
504f06a2 EZ |
14 | ;; any later version. |
15 | ||
16 | ;; This program is distributed in the hope that it will be useful, | |
17 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 | ;; GNU General Public License for more details. | |
20 | ||
21 | ;; You should have received a copy of the GNU General Public License | |
22 | ;; along with GNU Emacs; see the file COPYING. If not, write to | |
3ef97fb6 LK |
23 | ;; the Free Software Foundation, 51 Franklin Street, Fifth Floor, |
24 | ;; Boston, MA 02110-1301, USA. | |
504f06a2 EZ |
25 | |
26 | ;; The used Thai word list has been taken from IBM's ICU4J project | |
27 | ;; (file `thai6.ucs', version 1.4, converted to TIS encoding, with | |
28 | ;; removal of three incorrect entries) to which the following license | |
29 | ;; applies: | |
30 | ;; | |
31 | ;; COPYRIGHT AND PERMISSION NOTICE | |
32 | ;; | |
33 | ;; | |
34 | ;; Copyright (c) 1995-2001 International Business Machines | |
35 | ;; Corporation and others | |
36 | ;; | |
37 | ;; All rights reserved. | |
38 | ;; | |
39 | ;; | |
40 | ;; Permission is hereby granted, free of charge, to any person | |
41 | ;; obtaining a copy of this software and associated documentation | |
42 | ;; files (the "Software"), to deal in the Software without | |
43 | ;; restriction, including without limitation the rights to use, | |
44 | ;; copy, modify, merge, publish, distribute, and/or sell copies of | |
45 | ;; the Software, and to permit persons to whom the Software is | |
46 | ;; furnished to do so, provided that the above copyright notice(s) | |
47 | ;; and this permission notice appear in all copies of the Software | |
48 | ;; and that both the above copyright notice(s) and this permission | |
49 | ;; notice appear in supporting documentation. | |
50 | ;; | |
51 | ;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
52 | ;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
53 | ;; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
54 | ;; NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE | |
55 | ;; COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE LIABLE | |
56 | ;; FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, | |
57 | ;; OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR | |
58 | ;; PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER | |
59 | ;; TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
60 | ;; PERFORMANCE OF THIS SOFTWARE. | |
61 | ;; | |
62 | ;; Except as contained in this notice, the name of a copyright | |
63 | ;; holder shall not be used in advertising or otherwise to promote | |
64 | ;; the sale, use or other dealings in this Software without prior | |
65 | ;; written authorization of the copyright holder. | |
66 | ||
67 | ||
68 | ;; This file implements an algorithm to find Thai word breaks using a | |
69 | ;; dictionary. It is based on the C program `cttex' written by | |
70 | ;; Vuthichai Ampornaramveth <vuthi@nii.ac.jp>. | |
71 | ||
72 | ||
73 | ;; Table of Thai words. It is a nested alist (see `nested-alist-p'), | |
74 | ;; which means that you can easily index the list character by | |
75 | ;; character. | |
76 | ||
675ae7bc KH |
77 | (defvar thai-word-table |
78 | (let ((table (list 'thai-words))) | |
79 | (dolist (elt | |
80 | ;;; The following is indented as this to minimize this file size. | |
504f06a2 EZ |
81 |