Commit | Line | Data |
---|---|---|
5d1bff20 | 1 | # eucjp-ms.awk -- Generate a translation table for eucJP-ms. |
5df4f04c | 2 | # Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
5d1bff20 KH |
3 | # National Institute of Advanced Industrial Science and Technology (AIST) |
4 | # Registration Number H13PRO009 | |
9ad5de0c | 5 | |
5d1bff20 | 6 | # This file is part of GNU Emacs. |
9ad5de0c GM |
7 | |
8 | # GNU Emacs is free software: you can redistribute it and/or modify | |
5d1bff20 | 9 | # it under the terms of the GNU General Public License as published by |
9ad5de0c GM |
10 | # the Free Software Foundation, either version 3 of the License, or |
11 | # (at your option) any later version. | |
12 | ||
5d1bff20 KH |
13 | # GNU Emacs is distributed in the hope that it will be useful, |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU General Public License for more details. | |
9ad5de0c | 17 | |
5d1bff20 | 18 | # You should have received a copy of the GNU General Public License |
9ad5de0c | 19 | # along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
5d1bff20 | 20 | |
9ad5de0c | 21 | # Commentary: |
5d1bff20 KH |
22 | |
23 | # eucJP-ms is one of eucJP-open encoding defined at this page: | |
2c36b577 KH |
24 | # http://home.m05.itscom.net/numa/cde/ucs-conv/appendix.html |
25 | # This program reads the mapping file EUC-JP-MS (of glibc) and | |
26 | # generates the Elisp file eucjp-ms.el that defines two translation | |
27 | # tables `eucjp-ms-decode' and `eucjp-ms-encode'. | |
5d1bff20 KH |
28 | |
29 | BEGIN { | |
2c36b577 KH |
30 | FS = "[ \t][ \t]*" |
31 | ||
32 | # STATE: 0/ignore, 1/JISX0208, 2/JISX0208 target range | |
33 | # 3/JISX0212 4/JISX0212 target range | |
34 | state = 0; | |
35 | ||
36 | JISX0208_FROM1 = "/xad/xa1"; | |
37 | JISX0208_TO1 = "/xad/xfc"; | |
38 | JISX0208_FROM2 = "/xf5/xa1"; | |
39 | JISX0212_FROM = "/x8f/xf3/xf3"; | |
40 | ||
5d1bff20 | 41 | print ";;; eucjp-ms.el -- translation table for eucJP-ms. -*- no-byte-compile: t -*-"; |
2c36b577 | 42 | print ";;; Automatically generated from /usr/share/i18n/charmaps/EUC-JP-MS.gz"; |
5d1bff20 | 43 | print "(let ((map"; |
2c36b577 | 44 | print " '(;JISEXT<->UNICODE"; |
5d1bff20 KH |
45 | } |
46 | ||
2c36b577 KH |
47 | function write_entry (unicode) { |
48 | if (state == 1) { | |
49 | if ($2 == JISX0208_FROM1 || $2 == JISX0208_FROM2) | |
50 | state = 2; | |
51 | } else if (state == 3) { | |
52 | if ($2 == JISX0212_FROM) | |
53 | state = 4; | |
5d1bff20 | 54 | } |
2c36b577 KH |
55 | if (state == 2) { |
56 | jis = $2 | |
57 | gsub("/x", "", jis); | |
58 | printf "\n (#x%s . #x%s)", jis, unicode; | |
59 | if ($2 == JISX0208_TO1) | |
60 | state = 1; | |
61 | } else if (state == 4) { | |
62 | jis = substr($2, 5, 8); | |
63 | gsub("/x", "", jis); | |
64 | printf "\n (#x%s #x%s)", jis, unicode; | |
65 | } | |
66 | } | |
67 | ||
68 | ||
69 | /^% JIS X 0208/ { | |
70 | state = 1; | |
71 | next; | |
72 | } | |
73 | ||
74 | /^% JIS X 0212/ { | |
75 | state = 3; | |
76 | next; | |
77 | } | |
78 | ||
79 | /^END CHARMAP/ { | |
80 | state = 0; | |
81 | next; | |
5d1bff20 KH |
82 | } |
83 | ||
2c36b577 KH |
84 | /^<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { |
85 | if (state > 0) | |
86 | write_entry(substr($1, 3, 4)); | |
5d1bff20 KH |
87 | } |
88 | ||
2c36b577 KH |
89 | /^%IRREVERSIBLE%<U[0-9A-Z][0-9A-Z][0-9A-Z][0-9A-Z]>/ { |
90 | if (state > 0) | |
91 | write_entry(substr($1, 17, 4)); | |
5d1bff20 KH |
92 | } |
93 | ||
94 | END { | |
95 | print ")))"; | |
96 | print " (mapc #'(lambda (x)"; | |
2c36b577 KH |
97 | print " (let ((code (logand (car x) #x7F7F)))"; |
98 | print " (if (integerp (cdr x))"; | |
99 | print " (setcar x (decode-char 'japanese-jisx0208 code))"; | |
100 | print " (setcar x (decode-char 'japanese-jisx0212 code))"; | |
101 | print " (setcdr x (cadr x)))))"; | |
5d1bff20 KH |
102 | print " map)"; |
103 | print " (define-translation-table 'eucjp-ms-decode map)"; | |
104 | print " (mapc #'(lambda (x)"; | |
105 | print " (let ((tmp (car x)))"; | |
106 | print " (setcar x (cdr x)) (setcdr x tmp)))"; | |
107 | print " map)"; | |
108 | print " (define-translation-table 'eucjp-ms-encode map))"; | |
109 | } | |
21e99729 | 110 |