Commit | Line | Data |
---|---|---|
e8f2cc26 YM |
1 | ;;; uvs.el --- utility for UVS (format 14) cmap subtables in OpenType fonts. |
2 | ||
3 | ;; Copyright (C) 2014 Free Software Foundation, Inc. | |
4 | ||
5 | ;; Author: YAMAMOTO Mitsuharu <mituharu@math.s.chiba-u.ac.jp> | |
6 | ||
7 | ;; This file is part of GNU Emacs. | |
8 | ||
9 | ;; GNU Emacs is free software: you can redistribute it and/or modify | |
10 | ;; it under the terms of the GNU General Public License as published by | |
11 | ;; the Free Software Foundation, either version 3 of the License, or | |
12 | ;; (at your option) any later version. | |
13 | ||
14 | ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 | ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 | ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 | ;; GNU General Public License for more details. | |
18 | ||
19 | ;; You should have received a copy of the GNU General Public License | |
20 | ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
21 | ||
22 | ;;; Commentary: | |
23 | ||
24 | ;; To extract a C array definition of a UVS table for the Adobe-Japan1 | |
25 | ;; character collection from an IVD Sequences file, execute | |
26 | ;; $ emacs -batch -l uvs.el \ | |
27 | ;; --eval '(uvs-print-table-ivd "IVD_Sequences.txt" "Adobe-Japan1")' \ | |
28 | ;; > uvs.h | |
29 | ||
30 | ;;; Code: | |
31 | ||
32 | (defun uvs-fields-total-size (fields) | |
33 | (apply '+ (mapcar (lambda (field) (get field 'uvs-field-size)) fields))) | |
34 | ||
35 | ;;; Fields in Format 14 header. | |
36 | (defconst uvs-format-14-header-fields | |
37 | '(format length num-var-selector-records)) | |
38 | (put 'format 'uvs-field-size 2) | |
39 | (put 'length 'uvs-field-size 4) | |
40 | (put 'num-var-selector-records 'uvs-field-size 4) | |
41 | (defconst uvs-format-14-header-size | |
42 | (uvs-fields-total-size uvs-format-14-header-fields)) | |
43 | ||
44 | ;;; Fields in Variation Selector Record. | |
45 | (defconst uvs-variation-selector-record-fields | |
46 | '(var-selector default-uvs-offset non-default-uvs-offset)) | |
47 | (put 'var-selector 'uvs-field-size 3) | |
48 | (put 'default-uvs-offset 'uvs-field-size 4) | |
49 | (put 'non-default-uvs-offset 'uvs-field-size 4) | |
50 | (defconst uvs-variation-selector-record-size | |
51 | (uvs-fields-total-size uvs-variation-selector-record-fields)) | |
52 | ||
53 | ;;; Fields in Non-Default UVS Table. | |
54 | (defconst uvs-non-default-uvs-table-header-fields '(num-uvs-mappings)) | |
55 | (put 'num-uvs-mappings 'uvs-field-size 4) | |
56 | (defconst uvs-non-default-uvs-table-header-size | |
57 | (uvs-fields-total-size uvs-non-default-uvs-table-header-fields)) | |
58 | ||
59 | ;;; Fields in UVS Mapping. | |
60 | (defconst uvs-uvs-mapping-fields '(unicode-value glyph-id)) | |
61 | (put 'unicode-value 'uvs-field-size 3) | |
62 | (put 'glyph-id 'uvs-field-size 2) | |
63 | (defconst uvs-uvs-mapping-size | |
64 | (uvs-fields-total-size uvs-uvs-mapping-fields)) | |
65 | ||
66 | (defun uvs-alist-from-ivd (collection-id sequence-id-to-glyph-function) | |
67 | "Create UVS alist from IVD Sequences and COLLECTION-ID. | |
68 | The IVD (Ideographic Variation Database) Sequences are obtained | |
69 | from the contents of the current buffer, and should be in the | |
70 | form of IVD_Sequences.txt specified in Unicode Technical Standard | |
71 | #37. COLLECTION-ID is a string specifying the identifier of the | |
72 | collection to extract (e.g., \"Adobe-Japan1\"). | |
73 | SEQUENCE-ID-TO-GLYPH-FUNC is a function to convert an identifier | |
74 | string of the sequence to a glyph number. UVS alist is of the | |
75 | following form: | |
76 | ((SELECTOR1 . ((BASE11 . GLYPH11) (BASE12 . GLYPH12) ...)) | |
77 | (SELECTOR2 . ((BASE21 . GLYPH21) (BASE22 . GLYPH22) ...)) ...), | |
78 | where selectors and bases are sorted in ascending order." | |
79 | (let (uvs-alist) | |
80 | (goto-char (point-min)) | |
81 | (while (re-search-forward | |
82 | (concat "^[[:blank:]]*" | |
83 | "\\([[:xdigit:]]+\\) \\([[:xdigit:]]+\\)" | |
84 | "[[:blank:]]*;[[:blank:]]*" | |
85 | "\\(?:" (regexp-quote collection-id) "\\)" | |
86 | "[[:blank:]]*;[[:blank:]]*" | |
87 | "\\([^\n[:blank:]]+\\)" | |
88 | "[[:blank:]]*$") nil t) | |
89 | (let* ((base (string-to-number (match-string 1) 16)) | |
90 | (selector (string-to-number (match-string 2) 16)) | |
91 | (sequence-id (match-string 3)) | |
92 | (glyph (funcall sequence-id-to-glyph-function sequence-id))) | |
93 | (let ((selector-bgs (assq selector uvs-alist)) | |
94 | (base-glyph (cons base glyph))) | |
95 | (if selector-bgs | |
96 | (setcdr selector-bgs (cons base-glyph (cdr selector-bgs))) | |
97 | (push (cons selector (list base-glyph)) uvs-alist))))) | |
98 | (dolist (selector-bgs uvs-alist) | |
99 | (setcdr selector-bgs | |
100 | (sort (cdr selector-bgs) | |
101 | (lambda (bg1 bg2) (< (car bg1) (car bg2)))))) | |
102 | (sort uvs-alist (lambda (sb1 sb2) (< (car sb1) (car sb2)))))) | |
103 | ||
104 | (defun uvs-int-to-bytes (value size) | |
105 | "Convert integer VALUE to a list of SIZE bytes. | |
106 | The most significant byte comes first." | |
107 | (let (result) | |
108 | (dotimes (i size) | |
109 | (push (logand value #xff) result) | |
110 | (setq value (lsh value -8))) | |
111 | result)) | |
112 | ||
113 | (defun uvs-insert-fields-as-bytes (fields &rest values) | |
114 | "Insert VALUES for FIELDS as a sequence of bytes to the current buffer. | |
115 | VALUES and FIELDS are lists of integers and field symbols, | |
116 | respectively. Byte length of each value is determined by the | |
117 | 'uvs-field-size' property of the corresponding field." | |
118 | (while fields | |
119 | (let ((field (car fields)) | |
120 | (value (car values))) | |
121 | (insert (apply 'unibyte-string | |
122 | (uvs-int-to-bytes value (get field 'uvs-field-size)))) | |
123 | (setq fields (cdr fields) values (cdr values))))) | |
124 | ||
125 | (defun uvs-insert-alist-as-bytes (uvs-alist) | |
126 | "Insert UVS-ALIST as a sequence of bytes to the current buffer." | |
127 | (let* ((nrecords (length uvs-alist)) ; # of selectors | |
128 | (total-nmappings | |
129 | (apply '+ (mapcar | |
130 | (lambda (selector-bgs) (length (cdr selector-bgs))) | |
131 | uvs-alist))) | |
132 | (non-default-offset | |
133 | (+ uvs-format-14-header-size | |
134 | (* uvs-variation-selector-record-size nrecords)))) | |
135 | (uvs-insert-fields-as-bytes uvs-format-14-header-fields | |
136 | 14 | |
137 | (+ uvs-format-14-header-size | |
138 | (* uvs-variation-selector-record-size | |
139 | nrecords) | |
140 | (* uvs-non-default-uvs-table-header-size | |
141 | nrecords) | |
142 | (* uvs-uvs-mapping-size total-nmappings)) | |
143 | nrecords) | |
144 | (dolist (selector-bgs uvs-alist) | |
145 | (uvs-insert-fields-as-bytes uvs-variation-selector-record-fields | |
146 | (car selector-bgs) | |
147 | 0 ; No Default UVS Tables. | |
148 | non-default-offset) | |
149 | (setq non-default-offset | |
150 | (+ non-default-offset | |
151 | uvs-non-default-uvs-table-header-size | |
152 | (* (length (cdr selector-bgs)) uvs-uvs-mapping-size)))) | |
153 | (dolist (selector-bgs uvs-alist) | |
154 | (uvs-insert-fields-as-bytes uvs-non-default-uvs-table-header-fields | |
155 | (length (cdr selector-bgs))) | |
156 | (dolist (base-glyph (cdr selector-bgs)) | |
157 | (uvs-insert-fields-as-bytes uvs-uvs-mapping-fields | |
158 | (car base-glyph) | |
159 | (cdr base-glyph)))))) | |
160 | ||
161 | (defun uvs-dump (&optional bytes-per-line separator separator-eol line-prefix) | |
162 | "Print the current buffer as in representation of C array contents." | |
163 | (or bytes-per-line (setq bytes-per-line 8)) | |
164 | (or separator (setq separator ", ")) | |
165 | (or separator-eol (setq separator-eol ",")) | |
166 | (or line-prefix (setq line-prefix " ")) | |
167 | (goto-char (point-min)) | |
168 | (while (> (- (point-max) (point)) bytes-per-line) | |
169 | (princ line-prefix) | |
170 | (princ (mapconcat (lambda (byte) (format "0x%02x" byte)) | |
171 | (string-to-unibyte | |
172 | (buffer-substring (point) (+ (point) bytes-per-line))) | |
173 | separator)) | |
174 | (princ separator-eol) | |
175 | (terpri) | |
176 | (forward-char bytes-per-line)) | |
177 | (princ line-prefix) | |
178 | (princ (mapconcat (lambda (byte) (format "0x%02x" byte)) | |
179 | (string-to-unibyte | |
180 | (buffer-substring (point) (point-max))) | |
181 | separator)) | |
182 | (terpri)) | |
183 | ||
184 | (defun uvs-print-table-ivd (filename collection-id | |
185 | &optional sequence-id-to-glyph-func) | |
186 | "Print a C array definition of a UVS table for IVD Sequences. | |
187 | FILENAME specifies the IVD Sequences file. COLLECTION-ID is a | |
188 | string specifying the identifier of the collection to | |
189 | extract (e.g., \"Adobe-Japan1\"). SEQUENCE-ID-TO-GLYPH-FUNC is a | |
190 | function to convert an identifier string of the sequence to a | |
191 | glyph number, and nil means to convert \"CID\\+[0-9]+\" to the | |
192 | corresponding number." | |
193 | (or sequence-id-to-glyph-func | |
194 | (setq sequence-id-to-glyph-func | |
195 | (lambda (sequence-id) | |
196 | (string-match "\\`CID\\+\\([[:digit:]]+\\)\\'" sequence-id) | |
197 | (string-to-number (match-string 1 sequence-id))))) | |
198 | (let ((uvs-alist | |
199 | (with-temp-buffer | |
200 | (insert-file-contents filename) | |
201 | (setq uvs-alist (uvs-alist-from-ivd collection-id | |
202 | sequence-id-to-glyph-func))))) | |
203 | (princ | |
204 | (format "static const unsigned char mac_uvs_table_%s_bytes[] =\n {\n" | |
205 | (replace-regexp-in-string "[^_[:alnum:]]" "_" | |
206 | (downcase collection-id)))) | |
207 | (with-temp-buffer | |
208 | (set-buffer-multibyte nil) | |
209 | (uvs-insert-alist-as-bytes uvs-alist) | |
210 | (uvs-dump)) | |
211 | (princ " };\n"))) | |
212 | ||
213 | ;;; uvs.el ends here |