Fix ptrdiff_t/EMACS_INT confusion.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
73b0cd50 2 Copyright (C) 2001-2011 Free Software Foundation, Inc.
7976eda0 3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5df4f04c 4 2005, 2006, 2007, 2008, 2009, 2010, 2011
ce03bf76
KH
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
4ed46869 7
327719ee 8 Copyright (C) 2003, 2004
3263d5a2
KH
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
4ed46869 11
369314dc
KH
12This file is part of GNU Emacs.
13
9ec0b715 14GNU Emacs is free software: you can redistribute it and/or modify
369314dc 15it under the terms of the GNU General Public License as published by
9ec0b715
GM
16the Free Software Foundation, either version 3 of the License, or
17(at your option) any later version.
4ed46869 18
369314dc
KH
19GNU Emacs is distributed in the hope that it will be useful,
20but WITHOUT ANY WARRANTY; without even the implied warranty of
21MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22GNU General Public License for more details.
4ed46869 23
369314dc 24You should have received a copy of the GNU General Public License
9ec0b715 25along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 26
68c45bf0 27#include <config.h>
68c45bf0 28
4ed46869 29#include <stdio.h>
3263d5a2
KH
30#include <unistd.h>
31#include <ctype.h>
726929c4 32#include <limits.h>
4ed46869 33#include <sys/types.h>
d7306fe6 34#include <setjmp.h>
4ed46869 35#include "lisp.h"
3263d5a2 36#include "character.h"
4ed46869
KH
37#include "charset.h"
38#include "coding.h"
fc6b09bf 39#include "disptab.h"
3263d5a2 40#include "buffer.h"
4ed46869 41
04c2f2c5 42/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 43
3263d5a2 44 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 45 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
4ed46869 49
3263d5a2
KH
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
04c2f2c5 53 charset_table as a struct charset.
4ed46869 54
3263d5a2 55*/
4ed46869 56
3263d5a2
KH
57/* Hash table that contains attributes of each charset. Keys are
58 charset symbols, and values are vectors of charset attributes. */
59Lisp_Object Vcharset_hash_table;
4ed46869 60
3263d5a2
KH
61/* Table of struct charset. */
62struct charset *charset_table;
4ed46869 63
0065d054 64static ptrdiff_t charset_table_size;
5af5dd92 65static int charset_table_used;
4ed46869 66
3263d5a2 67Lisp_Object Qcharsetp;
4ed46869 68
3263d5a2
KH
69/* Special charset symbols. */
70Lisp_Object Qascii;
955cbe7b
PE
71static Lisp_Object Qeight_bit;
72static Lisp_Object Qiso_8859_1;
73static Lisp_Object Qunicode;
74static Lisp_Object Qemacs;
4ed46869 75
3263d5a2
KH
76/* The corresponding charsets. */
77int charset_ascii;
2fe1edd1 78int charset_eight_bit;
a2cb4e63 79static int charset_iso_8859_1;
3263d5a2 80int charset_unicode;
a2cb4e63 81static int charset_emacs;
b0e3cf2b 82
7c7dceee
KH
83/* The other special charsets. */
84int charset_jisx0201_roman;
85int charset_jisx0208_1978;
86int charset_jisx0208;
d32320c4 87int charset_ksc5601;
c1a08b4c 88
3263d5a2 89/* Value of charset attribute `charset-iso-plane'. */
955cbe7b 90static Lisp_Object Qgl, Qgr;
c1a08b4c 91
d1a04588
KH
92/* Charset of unibyte characters. */
93int charset_unibyte;
4ed46869 94
3263d5a2
KH
95/* List of charsets ordered by the priority. */
96Lisp_Object Vcharset_ordered_list;
4ed46869 97
6a9c90ec
KH
98/* Sub-list of Vcharset_ordered_list that contains all non-preferred
99 charsets. */
100Lisp_Object Vcharset_non_preferred_head;
101
dbbb237d 102/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 103 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
104 -1. */
105unsigned short charset_ordered_list_tick;
4ed46869 106
3263d5a2
KH
107/* List of iso-2022 charsets. */
108Lisp_Object Viso_2022_charset_list;
35e623fb 109
3263d5a2
KH
110/* List of emacs-mule charsets. */
111Lisp_Object Vemacs_mule_charset_list;
112
b84ae584 113int emacs_mule_charset[256];
4ed46869
KH
114
115/* Mapping table from ISO2022's charset (specified by DIMENSION,
116 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
117int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
118
69f8de5b
KH
119#define CODE_POINT_TO_INDEX(charset, code) \
120 ((charset)->code_linear_p \
121 ? (code) - (charset)->min_code \
122 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
123 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
124 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
125 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
126 ? (((((code) >> 24) - (charset)->code_space[12]) \
127 * (charset)->code_space[11]) \
128 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
129 * (charset)->code_space[7]) \
130 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
131 * (charset)->code_space[3]) \
820ee249
KH
132 + (((code) & 0xFF) - (charset)->code_space[0]) \
133 - ((charset)->char_index_offset)) \
3263d5a2
KH
134 : -1)
135
136
137/* Convert the character index IDX to code-point CODE for CHARSET.
138 It is assumed that IDX is in a valid range. */
139
820ee249
KH
140#define INDEX_TO_CODE_POINT(charset, idx) \
141 ((charset)->code_linear_p \
142 ? (idx) + (charset)->min_code \
143 : (idx += (charset)->char_index_offset, \
144 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
147 << 8) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
150 << 16) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
152 << 24))))
4cf9710d 153
ecca2aad
KH
154/* Structure to hold mapping tables for a charset. Used by temacs
155 invoked for dumping. */
8a73a704 156
ecca2aad
KH
157static struct
158{
159 /* The current charset for which the following tables are setup. */
160 struct charset *current;
161
162 /* 1 iff the following table is used for encoder. */
163 short for_encoder;
164
165 /* When the following table is used for encoding, mininum and
166 maxinum character of the current charset. */
167 int min_char, max_char;
168
169 /* A Unicode character correspoinding to the code indice 0 (i.e. the
170 minimum code-point) of the current charset, or -1 if the code
171 indice 0 is not a Unicode character. This is checked when
172 table.encoder[CHAR] is zero. */
173 int zero_index_char;
174
175 union {
176 /* Table mapping code-indices (not code-points) of the current
177 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
178 doesn't belong to the current charset. */
179 int decoder[0x10000];
180 /* Table mapping Unicode characters to code-indices of the current
181 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
182 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
183 (0x20000..0x2FFFF). Note that there is no charset map that
184 uses both SMP and SIP. */
185 unsigned short encoder[0x20000];
186 } table;
187} *temp_charset_work;
188
189#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
190 do { \
191 if ((CODE) == 0) \
192 temp_charset_work->zero_index_char = (C); \
193 else if ((C) < 0x20000) \
194 temp_charset_work->table.encoder[(C)] = (CODE); \
195 else \
196 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
197 } while (0)
198
199#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
200 ((C) == temp_charset_work->zero_index_char ? 0 \
201 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
202 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
203 : temp_charset_work->table.encoder[(C) - 0x10000] \
204 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
205
206#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
207 (temp_charset_work->table.decoder[(CODE)] = (C))
208
209#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
210 (temp_charset_work->table.decoder[(CODE)])
046b1f03 211\f
93bcb785 212
e9ce014c
KH
213/* Set to 1 to warn that a charset map is loaded and thus a buffer
214 text and a string data may be relocated. */
3263d5a2 215int charset_map_loaded;
4ed46869 216
e9ce014c 217struct charset_map_entries
4ed46869 218{
e9ce014c
KH
219 struct {
220 unsigned from, to;
221 int c;
222 } entry[0x10000];
223 struct charset_map_entries *next;
224};
225
ecca2aad
KH
226/* Load the mapping information of CHARSET from ENTRIES for
227 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
228 encoding (CONTROL_FLAG == 2).
229
230 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
231 and CHARSET->fast_map.
232
233 If CONTROL_FLAG is 1, setup the following tables according to
234 CHARSET->method and inhibit_load_charset_map.
4cf9710d 235
ecca2aad
KH
236 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
237 ----------------------+--------------------+---------------------------
238 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
239 ----------------------+--------------------+---------------------------
240 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 241
ecca2aad 242 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 243
ecca2aad
KH
244 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
245 ----------------------+--------------------+---------------------------
246 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
247 ----------------------+--------------------+--------------------------
248 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
249*/
4ed46869 250
3263d5a2 251static void
971de7fb 252load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
4ed46869 253{
89ef49df 254 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
3263d5a2
KH
255 unsigned max_code = CHARSET_MAX_CODE (charset);
256 int ascii_compatible_p = charset->ascii_compatible_p;
257 int min_char, max_char, nonascii_min_char;
3263d5a2 258 int i;
3263d5a2 259 unsigned char *fast_map = charset->fast_map;
99529c2c 260
e9ce014c
KH
261 if (n_entries <= 0)
262 return;
263
ecca2aad 264 if (control_flag)
8ac5a9cc 265 {
ecca2aad
KH
266 if (! inhibit_load_charset_map)
267 {
268 if (control_flag == 1)
269 {
270 if (charset->method == CHARSET_METHOD_MAP)
271 {
272 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 273
ecca2aad
KH
274 vec = CHARSET_DECODER (charset)
275 = Fmake_vector (make_number (n), make_number (-1));
276 }
277 else
278 {
279 char_table_set_range (Vchar_unify_table,
280 charset->min_char, charset->max_char,
281 Qnil);
282 }
283 }
284 else
285 {
286 table = Fmake_char_table (Qnil, Qnil);
287 if (charset->method == CHARSET_METHOD_MAP)
288 CHARSET_ENCODER (charset) = table;
289 else
290 CHARSET_DEUNIFIER (charset) = table;
291 }
292 }
293 else
294 {
295 if (! temp_charset_work)
546961a9 296 temp_charset_work = xmalloc (sizeof (*temp_charset_work));
ecca2aad
KH
297 if (control_flag == 1)
298 {
299 memset (temp_charset_work->table.decoder, -1,
300 sizeof (int) * 0x10000);
ecca2aad
KH
301 }
302 else
303 {
304 memset (temp_charset_work->table.encoder, 0,
305 sizeof (unsigned short) * 0x20000);
306 temp_charset_work->zero_index_char = -1;
307 }
308 temp_charset_work->current = charset;
309 temp_charset_work->for_encoder = (control_flag == 2);
310 control_flag += 2;
311 }
3263d5a2 312 charset_map_loaded = 1;
2e344af3 313 }
6662e69b 314
e9ce014c 315 min_char = max_char = entries->entry[0].c;
3263d5a2 316 nonascii_min_char = MAX_CHAR;
e9ce014c 317 for (i = 0; i < n_entries; i++)
2e344af3 318 {
e9ce014c 319 unsigned from, to;
3befa583 320 int from_index, to_index, lim_index;
3b4f4446 321 int from_c, to_c;
e9ce014c 322 int idx = i % 0x10000;
3263d5a2 323
e9ce014c
KH
324 if (i > 0 && idx == 0)
325 entries = entries->next;
326 from = entries->entry[idx].from;
327 to = entries->entry[idx].to;
3b4f4446
KH
328 from_c = entries->entry[idx].c;
329 from_index = CODE_POINT_TO_INDEX (charset, from);
330 if (from == to)
6662e69b 331 {
3b4f4446
KH
332 to_index = from_index;
333 to_c = from_c;
6662e69b 334 }
3b4f4446 335 else
6662e69b 336 {
3b4f4446
KH
337 to_index = CODE_POINT_TO_INDEX (charset, to);
338 to_c = from_c + (to_index - from_index);
6662e69b 339 }
3b4f4446
KH
340 if (from_index < 0 || to_index < 0)
341 continue;
3befa583 342 lim_index = to_index + 1;
3263d5a2 343
ecca2aad
KH
344 if (to_c > max_char)
345 max_char = to_c;
346 else if (from_c < min_char)
347 min_char = from_c;
6662e69b 348
ecca2aad
KH
349 if (control_flag == 1)
350 {
351 if (charset->method == CHARSET_METHOD_MAP)
3befa583 352 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
353 ASET (vec, from_index, make_number (from_c));
354 else
3befa583 355 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
356 CHAR_TABLE_SET (Vchar_unify_table,
357 CHARSET_CODE_OFFSET (charset) + from_index,
358 make_number (from_c));
359 }
360 else if (control_flag == 2)
361 {
362 if (charset->method == CHARSET_METHOD_MAP
363 && CHARSET_COMPACT_CODES_P (charset))
3befa583 364 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
365 {
366 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
367
368 if (NILP (CHAR_TABLE_REF (table, from_c)))
369 CHAR_TABLE_SET (table, from_c, make_number (code));
370 }
371 else
3befa583 372 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
373 {
374 if (NILP (CHAR_TABLE_REF (table, from_c)))
375 CHAR_TABLE_SET (table, from_c, make_number (from_index));
376 }
377 }
378 else if (control_flag == 3)
3befa583 379 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
380 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
381 else if (control_flag == 4)
3befa583 382 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
383 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
384 else /* control_flag == 0 */
385 {
3b4f4446
KH
386 if (ascii_compatible_p)
387 {
388 if (! ASCII_BYTE_P (from_c))
389 {
390 if (from_c < nonascii_min_char)
391 nonascii_min_char = from_c;
392 }
393 else if (! ASCII_BYTE_P (to_c))
394 {
395 nonascii_min_char = 0x80;
396 }
397 }
177c0ea7 398
ecca2aad
KH
399 for (; from_c <= to_c; from_c++)
400 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 401 }
8ac5a9cc 402 }
3263d5a2 403
ecca2aad 404 if (control_flag == 0)
4ed46869 405 {
3263d5a2
KH
406 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
407 ? nonascii_min_char : min_char);
408 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 409 }
ecca2aad
KH
410 else if (control_flag == 4)
411 {
412 temp_charset_work->min_char = min_char;
413 temp_charset_work->max_char = max_char;
414 }
4ed46869
KH
415}
416
12bcae05 417
3263d5a2 418/* Read a hexadecimal number (preceded by "0x") from the file FP while
3b59c351 419 paying attention to comment character '#'. */
12bcae05 420
55d4c1b2 421static inline unsigned
caeeedc1 422read_hex (FILE *fp, int *eof, int *overflow)
12bcae05 423{
3263d5a2
KH
424 int c;
425 unsigned n;
12bcae05 426
3263d5a2
KH
427 while ((c = getc (fp)) != EOF)
428 {
69f8de5b 429 if (c == '#')
3263d5a2
KH
430 {
431 while ((c = getc (fp)) != EOF && c != '\n');
432 }
433 else if (c == '0')
434 {
435 if ((c = getc (fp)) == EOF || c == 'x')
436 break;
437 }
8f924df7 438 }
3263d5a2
KH
439 if (c == EOF)
440 {
441 *eof = 1;
442 return 0;
443 }
3263d5a2 444 n = 0;
caeeedc1
PE
445 while (isxdigit (c = getc (fp)))
446 {
447 if (UINT_MAX >> 4 < n)
448 *overflow = 1;
3263d5a2 449 n = ((n << 4)
caeeedc1
PE
450 | (c - ('0' <= c && c <= '9' ? '0'
451 : 'A' <= c && c <= 'F' ? 'A' - 10
452 : 'a' - 10)));
453 }
e9ce014c
KH
454 if (c != EOF)
455 ungetc (c, fp);
3263d5a2
KH
456 return n;
457}
12bcae05 458
3263d5a2 459/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
460 Each line of MAPFILE has this form
461 0xAAAA 0xCCCC
462 where 0xAAAA is a code-point and 0xCCCC is the corresponding
463 character code, or this form
464 0xAAAA-0xBBBB 0xCCCC
465 where 0xAAAA and 0xBBBB are code-points specifying a range, and
466 0xCCCC is the first character code of the range.
4ed46869 467
3263d5a2
KH
468 The returned vector has this form:
469 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 470 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
471 range.
472
18a10a21
JB
473 Note that this function uses `openp' to open MAPFILE but ignores
474 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 475
e9ce014c 476static void
971de7fb 477load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
4ed46869 478{
e9ce014c
KH
479 unsigned min_code = CHARSET_MIN_CODE (charset);
480 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
481 int fd;
482 FILE *fp;
3263d5a2 483 Lisp_Object suffixes;
e9ce014c 484 struct charset_map_entries *head, *entries;
d311d28c
PE
485 int n_entries;
486 ptrdiff_t count;
a2f3eb19 487 USE_SAFE_ALLOCA;
4ed46869 488
3263d5a2
KH
489 suffixes = Fcons (build_string (".map"),
490 Fcons (build_string (".TXT"), Qnil));
4ed46869 491
a2f3eb19 492 count = SPECPDL_INDEX ();
dde2559c 493 specbind (Qfile_name_handler_alist, Qnil);
4beef065 494 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
dde2559c 495 unbind_to (count, Qnil);
3263d5a2
KH
496 if (fd < 0
497 || ! (fp = fdopen (fd, "r")))
5fdb398c 498 error ("Failure in loading charset map: %s", SDATA (mapfile));
4ed46869 499
a2f3eb19
CY
500 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
501 large (larger than MAX_ALLOCA). */
502 SAFE_ALLOCA (head, struct charset_map_entries *,
503 sizeof (struct charset_map_entries));
504 entries = head;
72af86bd 505 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 506
e9ce014c 507 n_entries = 0;
3263d5a2
KH
508 while (1)
509 {
caeeedc1 510 unsigned from, to, c;
e9ce014c 511 int idx;
caeeedc1 512 int eof = 0, overflow = 0;
4ed46869 513
caeeedc1 514 from = read_hex (fp, &eof, &overflow);
3263d5a2
KH
515 if (eof)
516 break;
e9ce014c 517 if (getc (fp) == '-')
caeeedc1 518 to = read_hex (fp, &eof, &overflow);
e9ce014c
KH
519 else
520 to = from;
caeeedc1
PE
521 if (eof)
522 break;
523 c = read_hex (fp, &eof, &overflow);
524 if (eof)
525 break;
ac4137cc 526
caeeedc1
PE
527 if (overflow)
528 continue;
e9ce014c
KH
529 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
530 continue;
ac4137cc 531
e9ce014c 532 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 533 {
a2f3eb19
CY
534 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
535 sizeof (struct charset_map_entries));
e9ce014c 536 entries = entries->next;
72af86bd 537 memset (entries, 0, sizeof (struct charset_map_entries));
d311d28c 538 n_entries = 0;
3263d5a2 539 }
d311d28c 540 idx = n_entries;
e9ce014c
KH
541 entries->entry[idx].from = from;
542 entries->entry[idx].to = to;
543 entries->entry[idx].c = c;
544 n_entries++;
3263d5a2
KH
545 }
546 fclose (fp);
177c0ea7 547
e9ce014c 548 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 549 SAFE_FREE ();
4ed46869
KH
550}
551
e9ce014c 552static void
971de7fb 553load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
23d2a7f1 554{
e9ce014c
KH
555 unsigned min_code = CHARSET_MIN_CODE (charset);
556 unsigned max_code = CHARSET_MAX_CODE (charset);
557 struct charset_map_entries *head, *entries;
558 int n_entries;
559 int len = ASIZE (vec);
560 int i;
a2f3eb19 561 USE_SAFE_ALLOCA;
23d2a7f1 562
e9ce014c 563 if (len % 2 == 1)
3263d5a2 564 {
e9ce014c
KH
565 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
566 return;
3263d5a2 567 }
35e623fb 568
a2f3eb19
CY
569 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
570 large (larger than MAX_ALLOCA). */
571 SAFE_ALLOCA (head, struct charset_map_entries *,
572 sizeof (struct charset_map_entries));
573 entries = head;
72af86bd 574 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 575
e9ce014c
KH
576 n_entries = 0;
577 for (i = 0; i < len; i += 2)
35e623fb 578 {
e9ce014c
KH
579 Lisp_Object val, val2;
580 unsigned from, to;
d311d28c 581 EMACS_INT c;
e9ce014c 582 int idx;
d2665018 583
e9ce014c
KH
584 val = AREF (vec, i);
585 if (CONSP (val))
bbf12bb3 586 {
e9ce014c
KH
587 val2 = XCDR (val);
588 val = XCAR (val);
e9ce014c
KH
589 from = XFASTINT (val);
590 to = XFASTINT (val2);
bbf12bb3 591 }
e9ce014c 592 else
d311d28c 593 from = to = XFASTINT (val);
e9ce014c
KH
594 val = AREF (vec, i + 1);
595 CHECK_NATNUM (val);
596 c = XFASTINT (val);
76d7b829 597
e9ce014c
KH
598 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
599 continue;
76d7b829 600
dcc694d7 601 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 602 {
a2f3eb19
CY
603 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
604 sizeof (struct charset_map_entries));
e9ce014c 605 entries = entries->next;
72af86bd 606 memset (entries, 0, sizeof (struct charset_map_entries));
e9ce014c
KH
607 }
608 idx = n_entries % 0x10000;
609 entries->entry[idx].from = from;
610 entries->entry[idx].to = to;
611 entries->entry[idx].c = c;
612 n_entries++;
613 }
76d7b829 614
e9ce014c 615 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 616 SAFE_FREE ();
ac4137cc
KH
617}
618
ecca2aad
KH
619
620/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
621 map it is (see the comment of load_charset_map for the detail). */
622
3263d5a2 623static void
971de7fb 624load_charset (struct charset *charset, int control_flag)
76d7b829 625{
ecca2aad 626 Lisp_Object map;
76d7b829 627
ecca2aad
KH
628 if (inhibit_load_charset_map
629 && temp_charset_work
630 && charset == temp_charset_work->current
56f00ed2 631 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
632 return;
633
634 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
635 map = CHARSET_MAP (charset);
0ac2c299
PE
636 else
637 {
638 if (! CHARSET_UNIFIED_P (charset))
639 abort ();
640 map = CHARSET_UNIFY_MAP (charset);
641 }
ecca2aad
KH
642 if (STRINGP (map))
643 load_charset_map_from_file (charset, map, control_flag);
644 else
645 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 646}
76d7b829 647
3263d5a2
KH
648
649DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
650 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
5842a27b 651 (Lisp_Object object)
23d2a7f1 652{
3263d5a2 653 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
654}
655
4ed46869 656
a2cb4e63
PE
657static void
658map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
659 Lisp_Object function, Lisp_Object arg,
660 unsigned int from, unsigned int to)
ecca2aad
KH
661{
662 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
663 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
664 Lisp_Object range;
665 int c, stop;
666 struct gcpro gcpro1;
667
668 range = Fcons (Qnil, Qnil);
669 GCPRO1 (range);
670
671 c = temp_charset_work->min_char;
672 stop = (temp_charset_work->max_char < 0x20000
673 ? temp_charset_work->max_char : 0xFFFF);
51b59d79 674
ecca2aad
KH
675 while (1)
676 {
f6095868 677 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
ecca2aad 678
f6095868 679 if (idx >= from_idx && idx <= to_idx)
ecca2aad
KH
680 {
681 if (NILP (XCAR (range)))
682 XSETCAR (range, make_number (c));
683 }
684 else if (! NILP (XCAR (range)))
685 {
686 XSETCDR (range, make_number (c - 1));
687 if (c_function)
688 (*c_function) (arg, range);
689 else
690 call2 (function, range, arg);
691 XSETCAR (range, Qnil);
692 }
693 if (c == stop)
694 {
695 if (c == temp_charset_work->max_char)
696 {
697 if (! NILP (XCAR (range)))
698 {
699 XSETCDR (range, make_number (c));
700 if (c_function)
701 (*c_function) (arg, range);
702 else
703 call2 (function, range, arg);
704 }
705 break;
706 }
707 c = 0x1FFFF;
708 stop = temp_charset_work->max_char;
709 }
710 c++;
711 }
c542407d 712 UNGCPRO;
ecca2aad
KH
713}
714
4ed46869 715void
6f704c76
DN
716map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
717 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
4ed46869 718{
3263d5a2 719 Lisp_Object range;
374c5cfd 720 int partial;
3263d5a2 721
374c5cfd
KH
722 partial = (from > CHARSET_MIN_CODE (charset)
723 || to < CHARSET_MAX_CODE (charset));
724
3263d5a2 725 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 726 {
374c5cfd
KH
727 int from_idx = CODE_POINT_TO_INDEX (charset, from);
728 int to_idx = CODE_POINT_TO_INDEX (charset, to);
729 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
730 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
731
ecca2aad
KH
732 if (CHARSET_UNIFIED_P (charset))
733 {
734 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
735 load_charset (charset, 2);
736 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
737 map_char_table_for_charset (c_function, function,
738 CHARSET_DEUNIFIER (charset), arg,
739 partial ? charset : NULL, from, to);
740 else
741 map_charset_for_dump (c_function, function, arg, from, to);
742 }
743
374c5cfd 744 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 745 if (NILP (function))
5af5dd92 746 (*c_function) (arg, range);
3263d5a2
KH
747 else
748 call2 (function, range, arg);
c83ef371 749 }
3263d5a2
KH
750 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
751 {
752 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
753 load_charset (charset, 2);
754 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
755 map_char_table_for_charset (c_function, function,
756 CHARSET_ENCODER (charset), arg,
757 partial ? charset : NULL, from, to);
758 else
759 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 760 }
374c5cfd 761 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 762 {
374c5cfd
KH
763 Lisp_Object subset_info;
764 int offset;
765
766 subset_info = CHARSET_SUBSET (charset);
767 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
768 offset = XINT (AREF (subset_info, 3));
769 from -= offset;
770 if (from < XFASTINT (AREF (subset_info, 1)))
771 from = XFASTINT (AREF (subset_info, 1));
772 to -= offset;
773 if (to > XFASTINT (AREF (subset_info, 2)))
774 to = XFASTINT (AREF (subset_info, 2));
775 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 776 }
374c5cfd
KH
777 else /* i.e. CHARSET_METHOD_SUPERSET */
778 {
779 Lisp_Object parents;
4ed46869 780
374c5cfd
KH
781 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
782 parents = XCDR (parents))
bbf12bb3 783 {
374c5cfd
KH
784 int offset;
785 unsigned this_from, this_to;
786
787 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
788 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
789 this_from = from > offset ? from - offset : 0;
790 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
791 if (this_from < CHARSET_MIN_CODE (charset))
792 this_from = CHARSET_MIN_CODE (charset);
793 if (this_to > CHARSET_MAX_CODE (charset))
794 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
795 map_charset_chars (c_function, function, arg, charset,
796 this_from, this_to);
bbf12bb3 797 }
35e623fb 798 }
4ed46869
KH
799}
800
374c5cfd 801DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 802 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 803FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 804argument ARG.
4ed46869 805
374c5cfd
KH
806RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
807characters contained in CHARSET.
4ed46869 808
374c5cfd 809The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 810range of code points (in CHARSET) of target characters. */)
5842a27b 811 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
4ed46869 812{
374c5cfd 813 struct charset *cs;
16fed1fc 814 unsigned from, to;
4ed46869 815
374c5cfd
KH
816 CHECK_CHARSET_GET_CHARSET (charset, cs);
817 if (NILP (from_code))
16fed1fc 818 from = CHARSET_MIN_CODE (cs);
970b7474 819 else
4ed46869 820 {
970b7474
KH
821 from = XINT (from_code);
822 if (from < CHARSET_MIN_CODE (cs))
823 from = CHARSET_MIN_CODE (cs);
4ed46869 824 }
374c5cfd 825 if (NILP (to_code))
970b7474 826 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
827 else
828 {
970b7474
KH
829 to = XINT (to_code);
830 if (to > CHARSET_MAX_CODE (cs))
831 to = CHARSET_MAX_CODE (cs);
4ed46869 832 }
16fed1fc 833 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 834 return Qnil;
35e623fb 835}
4ed46869 836
4ed46869 837
3263d5a2
KH
838/* Define a charset according to the arguments. The Nth argument is
839 the Nth attribute of the charset (the last attribute `charset-id'
840 is not included). See the docstring of `define-charset' for the
841 detail. */
4ed46869 842
3263d5a2
KH
843DEFUN ("define-charset-internal", Fdefine_charset_internal,
844 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
845 doc: /* For internal use only.
846usage: (define-charset-internal ...) */)
f66c7cf8 847 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 848{
3263d5a2
KH
849 /* Charset attr vector. */
850 Lisp_Object attrs;
851 Lisp_Object val;
0de4bb68 852 EMACS_UINT hash_code;
3263d5a2 853 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 854 int i, j;
3263d5a2
KH
855 struct charset charset;
856 int id;
857 int dimension;
858 int new_definition_p;
859 int nchars;
860
861 if (nargs != charset_arg_max)
862 return Fsignal (Qwrong_number_of_arguments,
863 Fcons (intern ("define-charset-internal"),
864 make_number (nargs)));
865
866 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
867
868 CHECK_SYMBOL (args[charset_arg_name]);
869 ASET (attrs, charset_name, args[charset_arg_name]);
870
871 val = args[charset_arg_code_space];
c032b5f8 872 for (i = 0, dimension = 0, nchars = 1; ; i++)
76d7b829 873 {
d311d28c 874 Lisp_Object min_byte_obj, max_byte_obj;
3263d5a2
KH
875 int min_byte, max_byte;
876
d311d28c
PE
877 min_byte_obj = Faref (val, make_number (i * 2));
878 max_byte_obj = Faref (val, make_number (i * 2 + 1));
879 CHECK_RANGED_INTEGER (0, min_byte_obj, 255);
880 min_byte = XINT (min_byte_obj);
881 CHECK_RANGED_INTEGER (min_byte, max_byte_obj, 255);
882 max_byte = XINT (max_byte_obj);
3263d5a2
KH
883 charset.code_space[i * 4] = min_byte;
884 charset.code_space[i * 4 + 1] = max_byte;
885 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
3263d5a2
KH
886 if (max_byte > 0)
887 dimension = i + 1;
c032b5f8
PE
888 if (i == 3)
889 break;
890 nchars *= charset.code_space[i * 4 + 2];
891 charset.code_space[i * 4 + 3] = nchars;
3263d5a2 892 }
4ed46869 893
3263d5a2
KH
894 val = args[charset_arg_dimension];
895 if (NILP (val))
896 charset.dimension = dimension;
897 else
4ed46869 898 {
d311d28c 899 CHECK_RANGED_INTEGER (1, val, 4);
3263d5a2 900 charset.dimension = XINT (val);
4ed46869
KH
901 }
902
3263d5a2
KH
903 charset.code_linear_p
904 = (charset.dimension == 1
905 || (charset.code_space[2] == 256
906 && (charset.dimension == 2
907 || (charset.code_space[6] == 256
908 && (charset.dimension == 3
909 || charset.code_space[10] == 256)))));
910
69f8de5b 911 if (! charset.code_linear_p)
4ed46869 912 {
69f8de5b 913 charset.code_space_mask = (unsigned char *) xmalloc (256);
72af86bd 914 memset (charset.code_space_mask, 0, 256);
69f8de5b
KH
915 for (i = 0; i < 4; i++)
916 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
917 j++)
918 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
919 }
920
3263d5a2 921 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 922
3263d5a2
KH
923 charset.min_code = (charset.code_space[0]
924 | (charset.code_space[4] << 8)
925 | (charset.code_space[8] << 16)
926 | (charset.code_space[12] << 24));
927 charset.max_code = (charset.code_space[1]
928 | (charset.code_space[5] << 8)
929 | (charset.code_space[9] << 16)
930 | (charset.code_space[13] << 24));
820ee249 931 charset.char_index_offset = 0;
8a73a704 932
820ee249
KH
933 val = args[charset_arg_min_code];
934 if (! NILP (val))
935 {
be44ca6c 936 unsigned code = cons_to_unsigned (val, UINT_MAX);
fdb82f93 937
820ee249
KH
938 if (code < charset.min_code
939 || code > charset.max_code)
940 args_out_of_range_3 (make_number (charset.min_code),
941 make_number (charset.max_code), val);
942 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
943 charset.min_code = code;
944 }
3fac5a51 945
820ee249
KH
946 val = args[charset_arg_max_code];
947 if (! NILP (val))
3fac5a51 948 {
be44ca6c 949 unsigned code = cons_to_unsigned (val, UINT_MAX);
820ee249 950
820ee249
KH
951 if (code < charset.min_code
952 || code > charset.max_code)
953 args_out_of_range_3 (make_number (charset.min_code),
954 make_number (charset.max_code), val);
955 charset.max_code = code;
3fac5a51 956 }
3fac5a51 957
ecca2aad 958 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 959
3263d5a2
KH
960 val = args[charset_arg_invalid_code];
961 if (NILP (val))
962 {
963 if (charset.min_code > 0)
964 charset.invalid_code = 0;
bbf12bb3
KH
965 else
966 {
d311d28c 967 if (charset.max_code < min (UINT_MAX, MOST_POSITIVE_FIXNUM))
3263d5a2
KH
968 charset.invalid_code = charset.max_code + 1;
969 else
970 error ("Attribute :invalid-code must be specified");
76d7b829 971 }
76d7b829 972 }
3263d5a2 973 else
d311d28c 974 charset.invalid_code = XFASTINT (val);
4ed46869 975
3263d5a2
KH
976 val = args[charset_arg_iso_final];
977 if (NILP (val))
978 charset.iso_final = -1;
979 else
980 {
981 CHECK_NUMBER (val);
982 if (XINT (val) < '0' || XINT (val) > 127)
c2982e87 983 error ("Invalid iso-final-char: %"pI"d", XINT (val));
3263d5a2
KH
984 charset.iso_final = XINT (val);
985 }
4ed46869 986
3263d5a2
KH
987 val = args[charset_arg_iso_revision];
988 if (NILP (val))
989 charset.iso_revision = -1;
990 else
4ed46869 991 {
d311d28c 992 CHECK_RANGED_INTEGER (-1, val, 63);
3263d5a2 993 charset.iso_revision = XINT (val);
4ed46869 994 }
4ed46869 995
3263d5a2
KH
996 val = args[charset_arg_emacs_mule_id];
997 if (NILP (val))
998 charset.emacs_mule_id = -1;
4ed46869
KH
999 else
1000 {
3263d5a2
KH
1001 CHECK_NATNUM (val);
1002 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
c2982e87 1003 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
3263d5a2 1004 charset.emacs_mule_id = XINT (val);
c83ef371 1005 }
f6302ac9 1006
3263d5a2 1007 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1008
3263d5a2 1009 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1010
3263d5a2
KH
1011 charset.unified_p = 0;
1012
72af86bd 1013 memset (charset.fast_map, 0, sizeof (charset.fast_map));
3263d5a2
KH
1014
1015 if (! NILP (args[charset_arg_code_offset]))
1016 {
1017 val = args[charset_arg_code_offset];
d311d28c 1018 CHECK_TYPE_RANGED_INTEGER (int, val);
3263d5a2
KH
1019
1020 charset.method = CHARSET_METHOD_OFFSET;
1021 charset.code_offset = XINT (val);
1022
1023 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1024 charset.min_char = i + charset.code_offset;
1025 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1026 charset.max_char = i + charset.code_offset;
1027 if (charset.max_char > MAX_CHAR)
1028 error ("Unsupported max char: %d", charset.max_char);
1029
f148205f
KH
1030 i = (charset.min_char >> 7) << 7;
1031 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1032 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1033 i = (i >> 12) << 12;
3263d5a2
KH
1034 for (; i <= charset.max_char; i += 0x1000)
1035 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1036 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1037 charset.ascii_compatible_p = 1;
3263d5a2
KH
1038 }
1039 else if (! NILP (args[charset_arg_map]))
1040 {
1041 val = args[charset_arg_map];
1042 ASET (attrs, charset_map, val);
ecca2aad 1043 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1044 }
374c5cfd 1045 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1046 {
374c5cfd
KH
1047 Lisp_Object parent;
1048 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1049 struct charset *parent_charset;
1050
1051 val = args[charset_arg_subset];
1052 parent = Fcar (val);
1053 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1054 parent_min_code = Fnth (make_number (1), val);
1055 CHECK_NATNUM (parent_min_code);
1056 parent_max_code = Fnth (make_number (2), val);
1057 CHECK_NATNUM (parent_max_code);
1058 parent_code_offset = Fnth (make_number (3), val);
1059 CHECK_NUMBER (parent_code_offset);
1060 val = Fmake_vector (make_number (4), Qnil);
1061 ASET (val, 0, make_number (parent_charset->id));
1062 ASET (val, 1, parent_min_code);
1063 ASET (val, 2, parent_max_code);
1064 ASET (val, 3, parent_code_offset);
1065 ASET (attrs, charset_subset, val);
1066
1067 charset.method = CHARSET_METHOD_SUBSET;
1068 /* Here, we just copy the parent's fast_map. It's not accurate,
1069 but at least it works for quickly detecting which character
1070 DOESN'T belong to this charset. */
1071 for (i = 0; i < 190; i++)
1072 charset.fast_map[i] = parent_charset->fast_map[i];
1073
1074 /* We also copy these for parents. */
1075 charset.min_char = parent_charset->min_char;
1076 charset.max_char = parent_charset->max_char;
1077 }
1078 else if (! NILP (args[charset_arg_superset]))
0282eb69 1079 {
374c5cfd
KH
1080 val = args[charset_arg_superset];
1081 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1082 val = Fcopy_sequence (val);
374c5cfd 1083 ASET (attrs, charset_superset, val);
3263d5a2
KH
1084
1085 charset.min_char = MAX_CHAR;
1086 charset.max_char = 0;
1087 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1088 {
3263d5a2
KH
1089 Lisp_Object elt, car_part, cdr_part;
1090 int this_id, offset;
1091 struct charset *this_charset;
2e344af3 1092
3263d5a2
KH
1093 elt = Fcar (val);
1094 if (CONSP (elt))
2e344af3 1095 {
3263d5a2
KH
1096 car_part = XCAR (elt);
1097 cdr_part = XCDR (elt);
1098 CHECK_CHARSET_GET_ID (car_part, this_id);
d311d28c 1099 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
3263d5a2 1100 offset = XINT (cdr_part);
177c0ea7 1101 }
3263d5a2 1102 else
4ed46869 1103 {
3263d5a2
KH
1104 CHECK_CHARSET_GET_ID (elt, this_id);
1105 offset = 0;
4ed46869 1106 }
3263d5a2
KH
1107 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1108
1109 this_charset = CHARSET_FROM_ID (this_id);
1110 if (charset.min_char > this_charset->min_char)
1111 charset.min_char = this_charset->min_char;
1112 if (charset.max_char < this_charset->max_char)
1113 charset.max_char = this_charset->max_char;
1114 for (i = 0; i < 190; i++)
1115 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1116 }
0282eb69 1117 }
2e344af3 1118 else
3263d5a2 1119 error ("None of :code-offset, :map, :parents are specified");
05505664 1120
3263d5a2
KH
1121 val = args[charset_arg_unify_map];
1122 if (! NILP (val) && !STRINGP (val))
1123 CHECK_VECTOR (val);
1124 ASET (attrs, charset_unify_map, val);
05505664 1125
3263d5a2
KH
1126 CHECK_LIST (args[charset_arg_plist]);
1127 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1128
3263d5a2
KH
1129 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1130 &hash_code);
1131 if (charset.hash_index >= 0)
1132 {
1133 new_definition_p = 0;
4f65af01 1134 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1135 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1136 }
1a45ff10 1137 else
3263d5a2
KH
1138 {
1139 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1140 hash_code);
1141 if (charset_table_used == charset_table_size)
1142 {
e097a6fa 1143 /* Ensure that charset IDs fit into 'int' as well as into the
0065d054
PE
1144 restriction imposed by fixnums. Although the 'int' restriction
1145 could be removed, too much other code would need altering; for
1146 example, the IDs are stuffed into struct
1147 coding_system.charbuf[i] entries, which are 'int'. */
1148 int old_size = charset_table_size;
1149 struct charset *new_table =
1150 xpalloc (0, &charset_table_size, 1,
1151 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1152 sizeof *charset_table);
1153 memcpy (new_table, charset_table, old_size * sizeof *new_table);
2fe1edd1 1154 charset_table = new_table;
0065d054
PE
1155 /* FIXME: Doesn't this leak memory? The old charset_table becomes
1156 unreachable. It could be that this is intentional, because the
1157 old charset table may be in a dumped emacs, and reallocating such
1158 a table may not work. If the memory leak is intentional, a
1159 comment should be added to explain this. If not, the old
1160 charset_table should be freed, by passing it as the 1st argument
1161 to xpalloc and removing the memcpy. */
3263d5a2
KH
1162 }
1163 id = charset_table_used++;
3263d5a2
KH
1164 new_definition_p = 1;
1165 }
2e344af3 1166
4f65af01 1167 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1168 charset.id = id;
1169 charset_table[id] = charset;
2e344af3 1170
ecca2aad 1171 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1172 {
1173 load_charset (&charset, 0);
1174 charset_table[id] = charset;
1175 }
ecca2aad 1176
3263d5a2 1177 if (charset.iso_final >= 0)
4ed46869 1178 {
3263d5a2
KH
1179 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1180 charset.iso_final) = id;
1181 if (new_definition_p)
1182 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1183 Fcons (make_number (id), Qnil));
7c7dceee
KH
1184 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1185 charset_jisx0201_roman = id;
1186 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1187 charset_jisx0208_1978 = id;
1188 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1189 charset_jisx0208 = id;
d32320c4
KH
1190 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1191 charset_ksc5601 = id;
4ed46869 1192 }
d0cf2d48 1193
3263d5a2 1194 if (charset.emacs_mule_id >= 0)
4ed46869 1195 {
b84ae584 1196 emacs_mule_charset[charset.emacs_mule_id] = id;
4f65af01
KH
1197 if (charset.emacs_mule_id < 0xA0)
1198 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1199 else
1200 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1201 if (new_definition_p)
1202 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1203 Fcons (make_number (id), Qnil));
4ed46869
KH
1204 }
1205
3263d5a2
KH
1206 if (new_definition_p)
1207 {
1208 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1209 if (charset.supplementary_p)
1210 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1211 Fcons (make_number (id), Qnil));
1212 else
880820fe
KH
1213 {
1214 Lisp_Object tail;
1215
1216 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1217 {
1218 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1219
1220 if (cs->supplementary_p)
1221 break;
1222 }
1223 if (EQ (tail, Vcharset_ordered_list))
1224 Vcharset_ordered_list = Fcons (make_number (id),
1225 Vcharset_ordered_list);
1226 else if (NILP (tail))
1227 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1228 Fcons (make_number (id), Qnil));
1229 else
1230 {
1231 val = Fcons (XCAR (tail), XCDR (tail));
1232 XSETCDR (tail, val);
1233 XSETCAR (tail, make_number (id));
1234 }
1235 }
dbbb237d 1236 charset_ordered_list_tick++;
3263d5a2 1237 }
4ed46869 1238
3263d5a2 1239 return Qnil;
4ed46869
KH
1240}
1241
2fe1edd1
KH
1242
1243/* Same as Fdefine_charset_internal but arguments are more convenient
1244 to call from C (typically in syms_of_charset). This can define a
1245 charset of `offset' method only. Return the ID of the new
1246 charset. */
1247
1248static int
dd4c5104
DN
1249define_charset_internal (Lisp_Object name,
1250 int dimension,
dfb6afda 1251 const char *code_space_chars,
dd4c5104
DN
1252 unsigned min_code, unsigned max_code,
1253 int iso_final, int iso_revision, int emacs_mule_id,
1254 int ascii_compatible, int supplementary,
1255 int code_offset)
2fe1edd1 1256{
dfb6afda 1257 const unsigned char *code_space = (const unsigned char *) code_space_chars;
2fe1edd1
KH
1258 Lisp_Object args[charset_arg_max];
1259 Lisp_Object plist[14];
1260 Lisp_Object val;
1261 int i;
1262
1263 args[charset_arg_name] = name;
1264 args[charset_arg_dimension] = make_number (dimension);
1265 val = Fmake_vector (make_number (8), make_number (0));
1266 for (i = 0; i < 8; i++)
1267 ASET (val, i, make_number (code_space[i]));
1268 args[charset_arg_code_space] = val;
1269 args[charset_arg_min_code] = make_number (min_code);
1270 args[charset_arg_max_code] = make_number (max_code);
1271 args[charset_arg_iso_final]
1272 = (iso_final < 0 ? Qnil : make_number (iso_final));
1273 args[charset_arg_iso_revision] = make_number (iso_revision);
1274 args[charset_arg_emacs_mule_id]
1275 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1276 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1277 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1278 args[charset_arg_invalid_code] = Qnil;
1279 args[charset_arg_code_offset] = make_number (code_offset);
1280 args[charset_arg_map] = Qnil;
1281 args[charset_arg_subset] = Qnil;
1282 args[charset_arg_superset] = Qnil;
1283 args[charset_arg_unify_map] = Qnil;
1284
d67b4f80 1285 plist[0] = intern_c_string (":name");
2fe1edd1 1286 plist[1] = args[charset_arg_name];
d67b4f80 1287 plist[2] = intern_c_string (":dimension");
2fe1edd1 1288 plist[3] = args[charset_arg_dimension];
d67b4f80 1289 plist[4] = intern_c_string (":code-space");
2fe1edd1 1290 plist[5] = args[charset_arg_code_space];
d67b4f80 1291 plist[6] = intern_c_string (":iso-final-char");
2fe1edd1 1292 plist[7] = args[charset_arg_iso_final];
d67b4f80 1293 plist[8] = intern_c_string (":emacs-mule-id");
2fe1edd1 1294 plist[9] = args[charset_arg_emacs_mule_id];
d67b4f80 1295 plist[10] = intern_c_string (":ascii-compatible-p");
2fe1edd1 1296 plist[11] = args[charset_arg_ascii_compatible_p];
d67b4f80 1297 plist[12] = intern_c_string (":code-offset");
2fe1edd1
KH
1298 plist[13] = args[charset_arg_code_offset];
1299
1300 args[charset_arg_plist] = Flist (14, plist);
1301 Fdefine_charset_internal (charset_arg_max, args);
1302
1303 return XINT (CHARSET_SYMBOL_ID (name));
1304}
1305
1306
3263d5a2
KH
1307DEFUN ("define-charset-alias", Fdefine_charset_alias,
1308 Sdefine_charset_alias, 2, 2, 0,
1309 doc: /* Define ALIAS as an alias for charset CHARSET. */)
5842a27b 1310 (Lisp_Object alias, Lisp_Object charset)
4ed46869 1311{
3263d5a2
KH
1312 Lisp_Object attr;
1313
1314 CHECK_CHARSET_GET_ATTR (charset, attr);
1315 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1316 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1317 return Qnil;
1318}
4ed46869 1319
4ed46869 1320
3263d5a2 1321DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1322 doc: /* Return the property list of CHARSET. */)
5842a27b 1323 (Lisp_Object charset)
3263d5a2
KH
1324{
1325 Lisp_Object attrs;
1326
1327 CHECK_CHARSET_GET_ATTR (charset, attrs);
1328 return CHARSET_ATTR_PLIST (attrs);
1329}
1330
1331
1332DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1333 doc: /* Set CHARSET's property list to PLIST. */)
5842a27b 1334 (Lisp_Object charset, Lisp_Object plist)
3263d5a2
KH
1335{
1336 Lisp_Object attrs;
1337
1338 CHECK_CHARSET_GET_ATTR (charset, attrs);
1339 CHARSET_ATTR_PLIST (attrs) = plist;
1340 return plist;
1341}
1342
1343
dbbb237d 1344DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1345 doc: /* Unify characters of CHARSET with Unicode.
1346This means reading the relevant file and installing the table defined
dbbb237d
KH
1347by CHARSET's `:unify-map' property.
1348
64165ae2
DL
1349Optional second arg UNIFY-MAP is a file name string or a vector. It has
1350the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1351`define-charset' (which see).
1352
1353Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
5842a27b 1354 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
8a73a704 1355{
3263d5a2
KH
1356 int id;
1357 struct charset *cs;
8f924df7 1358
3263d5a2
KH
1359 CHECK_CHARSET_GET_ID (charset, id);
1360 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1361 if (NILP (deunify)
1362 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1363 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1364 return Qnil;
dbbb237d 1365
3263d5a2 1366 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1367 if (NILP (deunify))
1368 {
ecca2aad
KH
1369 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1370 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1371 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1372 if (NILP (unify_map))
1373 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1374 else
ecca2aad
KH
1375 {
1376 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1377 signal_error ("Bad unify-map", unify_map);
1378 CHARSET_UNIFY_MAP (cs) = unify_map;
1379 }
1380 if (NILP (Vchar_unify_table))
1381 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1382 char_table_set_range (Vchar_unify_table,
1383 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1384 CHARSET_UNIFIED_P (cs) = 1;
1385 }
1386 else if (CHAR_TABLE_P (Vchar_unify_table))
1387 {
1388 int min_code = CHARSET_MIN_CODE (cs);
1389 int max_code = CHARSET_MAX_CODE (cs);
1390 int min_char = DECODE_CHAR (cs, min_code);
1391 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1392
dbbb237d
KH
1393 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1394 }
8f924df7 1395
3263d5a2 1396 return Qnil;
8a73a704
KH
1397}
1398
3fac5a51
KH
1399DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1400 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1401 doc: /*
d0cf2d48 1402Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1403DIMENSION is the number of bytes to represent a character: 1 or 2.
1404CHARS is the number of characters in a dimension: 94 or 96.
1405
1406This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1407If there's no unused final char for the specified kind of charset,
fdb82f93 1408return nil. */)
5842a27b 1409 (Lisp_Object dimension, Lisp_Object chars)
3fac5a51
KH
1410{
1411 int final_char;
1412
b7826503
PJ
1413 CHECK_NUMBER (dimension);
1414 CHECK_NUMBER (chars);
3263d5a2
KH
1415 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1416 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1417 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1418 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1419 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1420 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1421 break;
3fac5a51
KH
1422 return (final_char <= '?' ? make_number (final_char) : Qnil);
1423}
1424
3263d5a2 1425static void
971de7fb 1426check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
4ed46869 1427{
3263d5a2
KH
1428 CHECK_NATNUM (dimension);
1429 CHECK_NATNUM (chars);
e6c3da20 1430 CHECK_CHARACTER (final_char);
4ed46869 1431
3263d5a2 1432 if (XINT (dimension) > 3)
c2982e87 1433 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
5fdb398c 1434 XINT (dimension));
4ed46869 1435 if (XINT (chars) != 94 && XINT (chars) != 96)
c2982e87 1436 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
3263d5a2 1437 if (XINT (final_char) < '0' || XINT (final_char) > '~')
e6c3da20
EZ
1438 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1439 (int)XINT (final_char));
3263d5a2
KH
1440}
1441
1442
1443DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1444 4, 4, 0,
cefd8c4f
KH
1445 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1446
1447On decoding by an ISO-2022 base coding system, when a charset
1448specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1449if CHARSET is designated instead. */)
5842a27b 1450 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
3263d5a2
KH
1451{
1452 int id;
82215ce9 1453 int chars_flag;
4ed46869 1454
3263d5a2
KH
1455 CHECK_CHARSET_GET_ID (charset, id);
1456 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1457 chars_flag = XINT (chars) == 96;
1458 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1459 return Qnil;
1460}
1461
3263d5a2 1462
2e344af3
KH
1463/* Return information about charsets in the text at PTR of NBYTES
1464 bytes, which are NCHARS characters. The value is:
f6302ac9 1465
cfe34140 1466 0: Each character is represented by one byte. This is always
3263d5a2
KH
1467 true for a unibyte string. For a multibyte string, true if
1468 it contains only ASCII characters.
1469
28c026cd
DL
1470 1: No charsets other than ascii, control-1, and latin-1 are
1471 found.
1d67c29b 1472
3263d5a2
KH
1473 2: Otherwise.
1474*/
4ed46869
KH
1475
1476int
971de7fb 1477string_xstring_p (Lisp_Object string)
4ed46869 1478{
8f924df7
KH
1479 const unsigned char *p = SDATA (string);
1480 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1481
8f924df7 1482 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1483 return 0;
1484
3263d5a2 1485 while (p < endp)
0282eb69 1486 {
3263d5a2 1487 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1488
3cc67a4d 1489 if (c >= 0x100)
3263d5a2 1490 return 2;
0282eb69 1491 }
3263d5a2
KH
1492 return 1;
1493}
05505664 1494
05505664 1495
3263d5a2 1496/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1497
3cc67a4d
KH
1498 CHARSETS is a vector. If Nth element is non-nil, it means the
1499 charset whose id is N is already found.
2e344af3 1500
3263d5a2 1501 It may lookup a translation table TABLE if supplied. */
2e344af3 1502
3263d5a2 1503static void
d311d28c 1504find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars, ptrdiff_t nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
3263d5a2 1505{
dbbb237d 1506 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1507
1508 if (nchars == nbytes)
3263d5a2 1509 {
3cc67a4d
KH
1510 if (multibyte)
1511 ASET (charsets, charset_ascii, Qt);
1512 else
1513 while (ptr < pend)
1514 {
1515 int c = *ptr++;
1516
1517 if (!NILP (table))
1518 c = translate_char (table, c);
1519 if (ASCII_BYTE_P (c))
1520 ASET (charsets, charset_ascii, Qt);
1521 else
1522 ASET (charsets, charset_eight_bit, Qt);
1523 }
1524 }
1525 else
1526 {
1527 while (ptr < pend)
3263d5a2 1528 {
3cc67a4d
KH
1529 int c = STRING_CHAR_ADVANCE (ptr);
1530 struct charset *charset;
3263d5a2 1531
3cc67a4d
KH
1532 if (!NILP (table))
1533 c = translate_char (table, c);
1534 charset = CHAR_CHARSET (c);
1535 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1536 }
4ed46869 1537 }
4ed46869
KH
1538}
1539
1540DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1541 2, 3, 0,
fdb82f93
PJ
1542 doc: /* Return a list of charsets in the region between BEG and END.
1543BEG and END are buffer positions.
1544Optional arg TABLE if non-nil is a translation table to look up.
1545
fdb82f93
PJ
1546If the current buffer is unibyte, the returned list may contain
1547only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1548 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
4ed46869 1549{
3263d5a2 1550 Lisp_Object charsets;
d311d28c 1551 ptrdiff_t from, from_byte, to, stop, stop_byte;
42ca828e 1552 int i;
4ed46869 1553 Lisp_Object val;
4b4deea2 1554 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
4ed46869
KH
1555
1556 validate_region (&beg, &end);
1557 from = XFASTINT (beg);
1558 stop = to = XFASTINT (end);
6ae1f27e 1559
4ed46869 1560 if (from < GPT && GPT < to)
6ae1f27e
RS
1561 {
1562 stop = GPT;
1563 stop_byte = GPT_BYTE;
1564 }
1565 else
1566 stop_byte = CHAR_TO_BYTE (stop);
1567
1568 from_byte = CHAR_TO_BYTE (from);
1569
3263d5a2 1570 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1571 while (1)
1572 {
3263d5a2 1573 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1574 stop_byte - from_byte, charsets, table,
1575 multibyte);
4ed46869 1576 if (stop < to)
6ae1f27e
RS
1577 {
1578 from = stop, from_byte = stop_byte;
1579 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1580 }
4ed46869
KH
1581 else
1582 break;
1583 }
6ae1f27e 1584
4ed46869 1585 val = Qnil;
3263d5a2 1586 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1587 if (!NILP (AREF (charsets, i)))
3263d5a2 1588 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1589 return val;
1590}
1591
1592DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1593 1, 2, 0,
fdb82f93
PJ
1594 doc: /* Return a list of charsets in STR.
1595Optional arg TABLE if non-nil is a translation table to look up.
1596
fdb82f93 1597If STR is unibyte, the returned list may contain
3263d5a2 1598only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1599 (Lisp_Object str, Lisp_Object table)
4ed46869 1600{
3263d5a2 1601 Lisp_Object charsets;
4ed46869
KH
1602 int i;
1603 Lisp_Object val;
1604
b7826503 1605 CHECK_STRING (str);
87b089ad 1606
3263d5a2 1607 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1608 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1609 charsets, table,
1610 STRING_MULTIBYTE (str));
4ed46869 1611 val = Qnil;
3263d5a2 1612 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1613 if (!NILP (AREF (charsets, i)))
3263d5a2 1614 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1615 return val;
1616}
2e344af3 1617
4ed46869 1618\f
3263d5a2 1619
ecca2aad
KH
1620/* Return a unified character code for C (>= 0x110000). VAL is a
1621 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1622 charset symbol. */
1623int
971de7fb 1624maybe_unify_char (int c, Lisp_Object val)
ecca2aad
KH
1625{
1626 struct charset *charset;
1627
1628 if (INTEGERP (val))
ccd9a01a 1629 return XFASTINT (val);
ecca2aad
KH
1630 if (NILP (val))
1631 return c;
1632
1633 CHECK_CHARSET_GET_CHARSET (val, charset);
1634 load_charset (charset, 1);
1635 if (! inhibit_load_charset_map)
1636 {
1637 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1638 if (! NILP (val))
ccd9a01a 1639 c = XFASTINT (val);
ecca2aad
KH
1640 }
1641 else
1642 {
1643 int code_index = c - CHARSET_CODE_OFFSET (charset);
1644 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1645
1646 if (unified > 0)
1647 c = unified;
1648 }
1649 return c;
1650}
1651
1652
3263d5a2
KH
1653/* Return a character correponding to the code-point CODE of
1654 CHARSET. */
1655
1656int
971de7fb 1657decode_char (struct charset *charset, unsigned int code)
4ed46869 1658{
3263d5a2
KH
1659 int c, char_index;
1660 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1661
3263d5a2
KH
1662 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1663 return -1;
4ed46869 1664
374c5cfd 1665 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1666 {
374c5cfd
KH
1667 Lisp_Object subset_info;
1668
1669 subset_info = CHARSET_SUBSET (charset);
1670 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1671 code -= XINT (AREF (subset_info, 3));
1672 if (code < XFASTINT (AREF (subset_info, 1))
1673 || code > XFASTINT (AREF (subset_info, 2)))
1674 c = -1;
1675 else
1676 c = DECODE_CHAR (charset, code);
2e344af3 1677 }
374c5cfd 1678 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1679 {
3263d5a2 1680 Lisp_Object parents;
4ed46869 1681
374c5cfd 1682 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1683 c = -1;
1684 for (; CONSP (parents); parents = XCDR (parents))
1685 {
1686 int id = XINT (XCAR (XCAR (parents)));
1687 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1688 unsigned this_code = code - code_offset;
4ed46869 1689
3263d5a2
KH
1690 charset = CHARSET_FROM_ID (id);
1691 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1692 break;
1693 }
1694 }
1695 else
ac4137cc 1696 {
3263d5a2 1697 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1698 if (char_index < 0)
1699 return -1;
4ed46869 1700
3263d5a2 1701 if (method == CHARSET_METHOD_MAP)
ac4137cc 1702 {
3263d5a2 1703 Lisp_Object decoder;
4ed46869 1704
3263d5a2
KH
1705 decoder = CHARSET_DECODER (charset);
1706 if (! VECTORP (decoder))
ecca2aad
KH
1707 {
1708 load_charset (charset, 1);
1709 decoder = CHARSET_DECODER (charset);
1710 }
1711 if (VECTORP (decoder))
1712 c = XINT (AREF (decoder, char_index));
1713 else
1714 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1715 }
ecca2aad 1716 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1717 {
3263d5a2 1718 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1719 if (CHARSET_UNIFIED_P (charset)
1720 && c > MAX_UNICODE_CHAR)
1721 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1722 }
1723 }
4ed46869 1724
3263d5a2 1725 return c;
90d7b74e
KH
1726}
1727
374c5cfd
KH
1728/* Variable used temporarily by the macro ENCODE_CHAR. */
1729Lisp_Object charset_work;
4ed46869 1730
3263d5a2 1731/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1732 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1733 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1734
3263d5a2 1735unsigned
971de7fb 1736encode_char (struct charset *charset, int c)
9d3d8cba 1737{
3263d5a2
KH
1738 unsigned code;
1739 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1740
3263d5a2 1741 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1742 {
6809ca75 1743 Lisp_Object deunifier;
ecca2aad 1744 int code_index = -1;
4ed46869 1745
3263d5a2
KH
1746 deunifier = CHARSET_DEUNIFIER (charset);
1747 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1748 {
ecca2aad 1749 load_charset (charset, 2);
3263d5a2 1750 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1751 }
ecca2aad
KH
1752 if (CHAR_TABLE_P (deunifier))
1753 {
1754 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1755
1756 if (INTEGERP (deunified))
1757 code_index = XINT (deunified);
1758 }
1759 else
1760 {
1761 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1762 }
1763 if (code_index >= 0)
1764 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1765 }
9d3d8cba 1766
374c5cfd
KH
1767 if (method == CHARSET_METHOD_SUBSET)
1768 {
1769 Lisp_Object subset_info;
1770 struct charset *this_charset;
1771
1772 subset_info = CHARSET_SUBSET (charset);
1773 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1774 code = ENCODE_CHAR (this_charset, c);
1775 if (code == CHARSET_INVALID_CODE (this_charset)
1776 || code < XFASTINT (AREF (subset_info, 1))
1777 || code > XFASTINT (AREF (subset_info, 2)))
1778 return CHARSET_INVALID_CODE (charset);
1779 code += XINT (AREF (subset_info, 3));
1780 return code;
1781 }
9d3d8cba 1782
374c5cfd 1783 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1784 {
3263d5a2 1785 Lisp_Object parents;
d2665018 1786
374c5cfd 1787 parents = CHARSET_SUPERSET (charset);
3263d5a2 1788 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1789 {
3263d5a2
KH
1790 int id = XINT (XCAR (XCAR (parents)));
1791 int code_offset = XINT (XCDR (XCAR (parents)));
1792 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1793
3263d5a2 1794 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1795 if (code != CHARSET_INVALID_CODE (this_charset))
1796 return code + code_offset;
beeedaad 1797 }
3263d5a2
KH
1798 return CHARSET_INVALID_CODE (charset);
1799 }
1bcc1567 1800
15c85a88
KH
1801 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1802 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1803 return CHARSET_INVALID_CODE (charset);
1bcc1567 1804
3263d5a2 1805 if (method == CHARSET_METHOD_MAP)
3f62427c 1806 {
3263d5a2 1807 Lisp_Object encoder;
beeedaad 1808 Lisp_Object val;
9b6a601f 1809
3263d5a2
KH
1810 encoder = CHARSET_ENCODER (charset);
1811 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1812 {
1813 load_charset (charset, 2);
1814 encoder = CHARSET_ENCODER (charset);
1815 }
1816 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1817 {
1818 val = CHAR_TABLE_REF (encoder, c);
1819 if (NILP (val))
1820 return CHARSET_INVALID_CODE (charset);
1821 code = XINT (val);
1822 if (! CHARSET_COMPACT_CODES_P (charset))
1823 code = INDEX_TO_CODE_POINT (charset, code);
1824 }
1825 else
1826 {
1827 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
51b59d79 1828 code = INDEX_TO_CODE_POINT (charset, code);
ecca2aad 1829 }
3263d5a2 1830 }
820ee249 1831 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1832 {
ecca2aad
KH
1833 int code_index = c - CHARSET_CODE_OFFSET (charset);
1834
1835 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1836 }
8ac5a9cc 1837
3263d5a2 1838 return code;
4ed46869
KH
1839}
1840
4ed46869 1841
3263d5a2
KH
1842DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1843 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1844Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1845
3263d5a2 1846CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1847
3263d5a2 1848Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1849and CODE-POINT to a character. Currently not supported and just ignored. */)
5842a27b 1850 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
4ed46869 1851{
3263d5a2
KH
1852 int c, id;
1853 unsigned code;
1854 struct charset *charsetp;
859f2b3c 1855
3263d5a2 1856 CHECK_CHARSET_GET_ID (charset, id);
be44ca6c 1857 code = cons_to_unsigned (code_point, UINT_MAX);
3263d5a2
KH
1858 charsetp = CHARSET_FROM_ID (id);
1859 c = DECODE_CHAR (charsetp, code);
1860 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1861}
1862
859f2b3c 1863
3263d5a2
KH
1864DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1865 doc: /* Encode the character CH into a code-point of CHARSET.
1866Return nil if CHARSET doesn't include CH.
17e7ef1b 1867
d0cf2d48 1868Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2 1869code-point in CCS. Currently not supported and just ignored. */)
5842a27b 1870 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
4ed46869 1871{
34206dd2 1872 int c, id;
3263d5a2
KH
1873 unsigned code;
1874 struct charset *charsetp;
046b1f03 1875
3263d5a2 1876 CHECK_CHARSET_GET_ID (charset, id);
34206dd2
PE
1877 CHECK_CHARACTER (ch);
1878 c = XFASTINT (ch);
3263d5a2 1879 charsetp = CHARSET_FROM_ID (id);
34206dd2 1880 code = ENCODE_CHAR (charsetp, c);
3263d5a2
KH
1881 if (code == CHARSET_INVALID_CODE (charsetp))
1882 return Qnil;
be44ca6c 1883 return INTEGER_TO_CONS (code);
beeedaad
KH
1884}
1885
beeedaad 1886
b121a744
KH
1887DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1888 doc:
1889 /* Return a character of CHARSET whose position codes are CODEn.
1890
1891CODE1 through CODE4 are optional, but if you don't supply sufficient
1892position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1893is specified. */)
5842a27b 1894 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
beeedaad 1895{
3263d5a2
KH
1896 int id, dimension;
1897 struct charset *charsetp;
b121a744
KH
1898 unsigned code;
1899 int c;
87b089ad 1900
3263d5a2
KH
1901 CHECK_CHARSET_GET_ID (charset, id);
1902 charsetp = CHARSET_FROM_ID (id);
4ed46869 1903
b121a744
KH
1904 dimension = CHARSET_DIMENSION (charsetp);
1905 if (NILP (code1))
d47073ca
KH
1906 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1907 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1908 else
859f2b3c 1909 {
b121a744
KH
1910 CHECK_NATNUM (code1);
1911 if (XFASTINT (code1) >= 0x100)
1912 args_out_of_range (make_number (0xFF), code1);
1913 code = XFASTINT (code1);
859f2b3c 1914
b0a1e45e 1915 if (dimension > 1)
beeedaad 1916 {
b121a744 1917 code <<= 8;
b0a1e45e
KH
1918 if (NILP (code2))
1919 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1920 else
b121a744 1921 {
b0a1e45e
KH
1922 CHECK_NATNUM (code2);
1923 if (XFASTINT (code2) >= 0x100)
1924 args_out_of_range (make_number (0xFF), code2);
1925 code |= XFASTINT (code2);
b121a744 1926 }
99529c2c 1927
b0a1e45e 1928 if (dimension > 2)
b121a744
KH
1929 {
1930 code <<= 8;
b0a1e45e
KH
1931 if (NILP (code3))
1932 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1933 else
1934 {
b0a1e45e
KH
1935 CHECK_NATNUM (code3);
1936 if (XFASTINT (code3) >= 0x100)
1937 args_out_of_range (make_number (0xFF), code3);
1938 code |= XFASTINT (code3);
1939 }
1940
1941 if (dimension > 3)
1942 {
1943 code <<= 8;
1944 if (NILP (code4))
1945 code |= charsetp->code_space[0];
1946 else
1947 {
1948 CHECK_NATNUM (code4);
1949 if (XFASTINT (code4) >= 0x100)
1950 args_out_of_range (make_number (0xFF), code4);
1951 code |= XFASTINT (code4);
1952 }
b121a744
KH
1953 }
1954 }
beeedaad 1955 }
859f2b3c 1956 }
beeedaad 1957
b121a744
KH
1958 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1959 code &= 0x7F7F7F7F;
1960 c = DECODE_CHAR (charsetp, code);
1961 if (c < 0)
1962 error ("Invalid code(s)");
3263d5a2 1963 return make_number (c);
4ed46869
KH
1964}
1965
beeedaad 1966
3263d5a2
KH
1967/* Return the first charset in CHARSET_LIST that contains C.
1968 CHARSET_LIST is a list of charset IDs. If it is nil, use
1969 Vcharset_ordered_list. */
beeedaad 1970
3263d5a2 1971struct charset *
971de7fb 1972char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2e344af3 1973{
8a1816bb
KH
1974 int maybe_null = 0;
1975
3263d5a2
KH
1976 if (NILP (charset_list))
1977 charset_list = Vcharset_ordered_list;
8a1816bb
KH
1978 else
1979 maybe_null = 1;
beeedaad 1980
6c652beb 1981 while (CONSP (charset_list))
2e344af3 1982 {
3263d5a2
KH
1983 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1984 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 1985
3263d5a2 1986 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 1987 {
3263d5a2
KH
1988 if (code_return)
1989 *code_return = code;
1990 return charset;
3f62427c 1991 }
3263d5a2 1992 charset_list = XCDR (charset_list);
c0be27fd
KH
1993 if (! maybe_null
1994 && c <= MAX_UNICODE_CHAR
1995 && EQ (charset_list, Vcharset_non_preferred_head))
6c652beb 1996 return CHARSET_FROM_ID (charset_unicode);
3f62427c 1997 }
8a1816bb
KH
1998 return (maybe_null ? NULL
1999 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2000 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2001}
2002
2e344af3 2003
3263d5a2 2004DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2005 doc:
d0cf2d48 2006 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2007The charset is decided by the current priority order of charsets.
2008A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2009CH in the charset. */)
5842a27b 2010 (Lisp_Object ch)
4ed46869 2011{
3263d5a2
KH
2012 struct charset *charset;
2013 int c, dimension;
2014 unsigned code;
4ed46869
KH
2015 Lisp_Object val;
2016
3263d5a2
KH
2017 CHECK_CHARACTER (ch);
2018 c = XFASTINT (ch);
2019 charset = CHAR_CHARSET (c);
2020 if (! charset)
3cc67a4d 2021 abort ();
3263d5a2
KH
2022 code = ENCODE_CHAR (charset, c);
2023 if (code == CHARSET_INVALID_CODE (charset))
2024 abort ();
2025 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2026 for (val = Qnil; dimension > 0; dimension--)
2027 {
2028 val = Fcons (make_number (code & 0xFF), val);
2029 code >>= 8;
2030 }
3263d5a2 2031 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2032}
2033
740f080d 2034
4cb75c4b
KH
2035DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2036 doc: /* Return the charset of highest priority that contains CH.
2037If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2038from which to find the charset. It may also be a coding system. In
2039that case, find the charset from what supported by that coding system. */)
5842a27b 2040 (Lisp_Object ch, Lisp_Object restriction)
4ed46869 2041{
3263d5a2 2042 struct charset *charset;
4ed46869 2043
3263d5a2 2044 CHECK_CHARACTER (ch);
4cb75c4b
KH
2045 if (NILP (restriction))
2046 charset = CHAR_CHARSET (XINT (ch));
2047 else
2048 {
4cb75c4b
KH
2049 if (CONSP (restriction))
2050 {
7a84eee5
KH
2051 int c = XFASTINT (ch);
2052
2053 for (; CONSP (restriction); restriction = XCDR (restriction))
4cb75c4b 2054 {
f6095868 2055 struct charset *rcharset;
4cb75c4b 2056
f6095868
PE
2057 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2058 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
7a84eee5 2059 return XCAR (restriction);
4cb75c4b 2060 }
7a84eee5 2061 return Qnil;
4cb75c4b 2062 }
7a84eee5
KH
2063 restriction = coding_system_charset_list (restriction);
2064 charset = char_charset (XINT (ch), restriction, NULL);
4cb75c4b
KH
2065 if (! charset)
2066 return Qnil;
2067 }
3263d5a2 2068 return (CHARSET_NAME (charset));
4ed46869
KH
2069}
2070
17e7ef1b 2071
3263d5a2
KH
2072DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2073 doc: /*
2074Return charset of a character in the current buffer at position POS.
2075If POS is nil, it defauls to the current point.
2076If POS is out of range, the value is nil. */)
5842a27b 2077 (Lisp_Object pos)
2e344af3 2078{
3263d5a2
KH
2079 Lisp_Object ch;
2080 struct charset *charset;
046b1f03 2081
3263d5a2
KH
2082 ch = Fchar_after (pos);
2083 if (! INTEGERP (ch))
2084 return ch;
2085 charset = CHAR_CHARSET (XINT (ch));
2086 return (CHARSET_NAME (charset));
6ae1f27e 2087}
9036eb45 2088
87b089ad 2089
3263d5a2
KH
2090DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2091 doc: /*
2092Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2093
2094ISO 2022's designation sequence (escape sequence) distinguishes charsets
2095by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2096whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2097See the documentation of the function `charset-info' for the meanings of
2098DIMENSION, CHARS, and FINAL-CHAR. */)
5842a27b 2099 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
6ae1f27e 2100{
3263d5a2 2101 int id;
82215ce9 2102 int chars_flag;
a8a35e61 2103
3263d5a2 2104 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2105 chars_flag = XFASTINT (chars) == 96;
2106 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2107 XFASTINT (final_char));
2108 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2109}
2110
87b089ad 2111
3263d5a2
KH
2112DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2113 0, 0, 0,
2114 doc: /*
ecca2aad
KH
2115Internal use only.
2116Clear temporary charset mapping tables.
2117It should be called only from temacs invoked for dumping. */)
5842a27b 2118 (void)
87b089ad 2119{
ecca2aad 2120 if (temp_charset_work)
87b089ad 2121 {
baad03f0 2122 xfree (temp_charset_work);
ecca2aad 2123 temp_charset_work = NULL;
2e344af3 2124 }
2e344af3 2125
ecca2aad
KH
2126 if (CHAR_TABLE_P (Vchar_unify_table))
2127 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2128
3263d5a2 2129 return Qnil;
740f080d
KH
2130}
2131
8ddf5e57
DL
2132DEFUN ("charset-priority-list", Fcharset_priority_list,
2133 Scharset_priority_list, 0, 1, 0,
2134 doc: /* Return the list of charsets ordered by priority.
2135HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 2136 (Lisp_Object highestp)
2e344af3 2137{
8ddf5e57 2138 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2139
8ddf5e57 2140 if (!NILP (highestp))
16fed1fc 2141 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2142
8ddf5e57 2143 while (!NILP (list))
2e344af3 2144 {
16fed1fc 2145 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2146 list = XCDR (list);
2e344af3 2147 }
8ddf5e57 2148 return Fnreverse (val);
2e344af3
KH
2149}
2150
8ddf5e57
DL
2151DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2152 1, MANY, 0,
2153 doc: /* Assign higher priority to the charsets given as arguments.
2154usage: (set-charset-priority &rest charsets) */)
f66c7cf8 2155 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 2156{
af7c60ca 2157 Lisp_Object new_head, old_list, arglist[2];
321c819c 2158 Lisp_Object list_2022, list_emacs_mule;
f66c7cf8 2159 ptrdiff_t i;
c5101a77 2160 int id;
4ed46869 2161
8ddf5e57 2162 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2163 new_head = Qnil;
8ddf5e57 2164 for (i = 0; i < nargs; i++)
4ed46869 2165 {
8ddf5e57 2166 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2167 if (! NILP (Fmemq (make_number (id), old_list)))
2168 {
2169 old_list = Fdelq (make_number (id), old_list);
2170 new_head = Fcons (make_number (id), new_head);
2171 }
5729c92f 2172 }
8ddf5e57 2173 arglist[0] = Fnreverse (new_head);
6a9c90ec 2174 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2175 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2176 charset_ordered_list_tick++;
5729c92f 2177
6809ca75 2178 charset_unibyte = -1;
321c819c 2179 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2180 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2181 {
e77415b0 2182 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2183 list_2022 = Fcons (XCAR (old_list), list_2022);
2184 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2185 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2186 if (charset_unibyte < 0)
2187 {
2188 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2189
2190 if (CHARSET_DIMENSION (charset) == 1
2191 && CHARSET_ASCII_COMPATIBLE_P (charset)
2192 && CHARSET_MAX_CHAR (charset) >= 0x80)
2193 charset_unibyte = CHARSET_ID (charset);
2194 }
4ed46869 2195 }
321c819c
KH
2196 Viso_2022_charset_list = Fnreverse (list_2022);
2197 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2198 if (charset_unibyte < 0)
2199 charset_unibyte = charset_iso_8859_1;
4ed46869 2200
8ddf5e57 2201 return Qnil;
4ed46869
KH
2202}
2203
d5b33309
KH
2204DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2205 0, 1, 0,
2206 doc: /* Internal use only.
2207Return charset identification number of CHARSET. */)
5842a27b 2208 (Lisp_Object charset)
4ed46869 2209{
d5b33309 2210 int id;
4ed46869 2211
d5b33309
KH
2212 CHECK_CHARSET_GET_ID (charset, id);
2213 return make_number (id);
4ed46869
KH
2214}
2215
7a84eee5
KH
2216struct charset_sort_data
2217{
2218 Lisp_Object charset;
2219 int id;
0065d054 2220 ptrdiff_t priority;
7a84eee5
KH
2221};
2222
2223static int
2224charset_compare (const void *d1, const void *d2)
2225{
2226 const struct charset_sort_data *data1 = d1, *data2 = d2;
0065d054
PE
2227 if (data1->priority != data2->priority)
2228 return data1->priority < data2->priority ? -1 : 1;
2229 return 0;
7a84eee5
KH
2230}
2231
2232DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2233 doc: /* Sort charset list CHARSETS by a priority of each charset.
2234Return the sorted list. CHARSETS is modified by side effects.
2235See also `charset-priority-list' and `set-charset-priority'. */)
2236 (Lisp_Object charsets)
2237{
2238 Lisp_Object len = Flength (charsets);
0065d054
PE
2239 ptrdiff_t n = XFASTINT (len), i, j;
2240 int done;
7a84eee5
KH
2241 Lisp_Object tail, elt, attrs;
2242 struct charset_sort_data *sort_data;
726929c4 2243 int id, min_id = INT_MAX, max_id = INT_MIN;
7a84eee5
KH
2244 USE_SAFE_ALLOCA;
2245
2246 if (n == 0)
2247 return Qnil;
0065d054 2248 SAFE_NALLOCA (sort_data, 1, n);
7a84eee5
KH
2249 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2250 {
2251 elt = XCAR (tail);
2252 CHECK_CHARSET_GET_ATTR (elt, attrs);
2253 sort_data[i].charset = elt;
2254 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
726929c4 2255 if (id < min_id)
7a84eee5 2256 min_id = id;
726929c4 2257 if (id > max_id)
7a84eee5
KH
2258 max_id = id;
2259 }
2260 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2261 done < n && CONSP (tail); tail = XCDR (tail), i++)
2262 {
2263 elt = XCAR (tail);
2264 id = XFASTINT (elt);
2265 if (id >= min_id && id <= max_id)
2266 for (j = 0; j < n; j++)
2267 if (sort_data[j].id == id)
2268 {
2269 sort_data[j].priority = i;
2270 done++;
2271 }
2272 }
2273 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2274 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2275 XSETCAR (tail, sort_data[i].charset);
2276 SAFE_FREE ();
2277 return charsets;
2278}
2279
4ed46869 2280\f
3263d5a2 2281void
971de7fb 2282init_charset (void)
4ed46869 2283{
c8f94403
GM
2284 Lisp_Object tempdir;
2285 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
51b59d79 2286 if (access (SSDATA (tempdir), 0) < 0)
c8f94403
GM
2287 {
2288 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2289Emacs will not function correctly without the character map files.\n\
2290Please check your installation!\n",
2291 tempdir);
2292 /* TODO should this be a fatal error? (Bug#909) */
2293 }
2294
2295 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2296}
2297
4ed46869 2298
dfcf069d 2299void
971de7fb 2300init_charset_once (void)
4ed46869
KH
2301{
2302 int i, j, k;
2303
3263d5a2
KH
2304 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2305 for (j = 0; j < ISO_MAX_CHARS; j++)
2306 for (k = 0; k < ISO_MAX_FINAL; k++)
2307 iso_charset_table[i][j][k] = -1;
4ed46869 2308
60383934 2309 for (i = 0; i < 256; i++)
b84ae584 2310 emacs_mule_charset[i] = -1;
4ed46869 2311
7c7dceee
KH
2312 charset_jisx0201_roman = -1;
2313 charset_jisx0208_1978 = -1;
2314 charset_jisx0208 = -1;
d32320c4 2315 charset_ksc5601 = -1;
4ed46869
KH
2316}
2317
2318#ifdef emacs
2319
dfcf069d 2320void
971de7fb 2321syms_of_charset (void)
4ed46869 2322{
0065d054
PE
2323 /* Allocate an initial charset table that is just under 64 KiB in size.
2324 This should be large enough so that the charset table need not be
2325 reallocated during an initial bootstrap. Allocating anything larger than
2326 64 KiB in an initial run may not work, because glibc malloc might use
2327 mmap for larger allocations, and these don't work well across dumped
2328 systems. */
2329 enum {
2330 initial_malloc_max = (1 << 16) - 1,
2331 charset_table_size_init = initial_malloc_max / sizeof (struct charset)
2332 };
2333
3263d5a2
KH
2334 DEFSYM (Qcharsetp, "charsetp");
2335
2336 DEFSYM (Qascii, "ascii");
2337 DEFSYM (Qunicode, "unicode");
6c652beb 2338 DEFSYM (Qemacs, "emacs");
2fe1edd1 2339 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2340 DEFSYM (Qiso_8859_1, "iso-8859-1");
2341
2342 DEFSYM (Qgl, "gl");
2343 DEFSYM (Qgr, "gr");
2344
3263d5a2
KH
2345 staticpro (&Vcharset_ordered_list);
2346 Vcharset_ordered_list = Qnil;
2347
2348 staticpro (&Viso_2022_charset_list);
2349 Viso_2022_charset_list = Qnil;
2350
2351 staticpro (&Vemacs_mule_charset_list);
2352 Vemacs_mule_charset_list = Qnil;
2353
3943ed76 2354 /* Don't staticpro them here. It's done in syms_of_fns. */
088dcc3e
DN
2355 QCtest = intern_c_string (":test");
2356 Qeq = intern_c_string ("eq");
3943ed76 2357
3263d5a2 2358 staticpro (&Vcharset_hash_table);
8f924df7
KH
2359 {
2360 Lisp_Object args[2];
2361 args[0] = QCtest;
2362 args[1] = Qeq;
2363 Vcharset_hash_table = Fmake_hash_table (2, args);
2364 }
3263d5a2 2365
0065d054
PE
2366 charset_table = (struct charset *) xmalloc (sizeof (struct charset)
2367 * charset_table_size_init);
2368 charset_table_size = charset_table_size_init;
3263d5a2
KH
2369 charset_table_used = 0;
2370
3263d5a2
KH
2371 defsubr (&Scharsetp);
2372 defsubr (&Smap_charset_chars);
2373 defsubr (&Sdefine_charset_internal);
2374 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2375 defsubr (&Scharset_plist);
2376 defsubr (&Sset_charset_plist);
2377 defsubr (&Sunify_charset);
3fac5a51 2378 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2379 defsubr (&Sdeclare_equiv_charset);
2380 defsubr (&Sfind_charset_region);
2381 defsubr (&Sfind_charset_string);
3263d5a2
KH
2382 defsubr (&Sdecode_char);
2383 defsubr (&Sencode_char);
4ed46869 2384 defsubr (&Ssplit_char);
3263d5a2 2385 defsubr (&Smake_char);
4ed46869 2386 defsubr (&Schar_charset);
90d7b74e 2387 defsubr (&Scharset_after);
4ed46869 2388 defsubr (&Siso_charset);
3263d5a2 2389 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2390 defsubr (&Scharset_priority_list);
2391 defsubr (&Sset_charset_priority);
d5b33309 2392 defsubr (&Scharset_id_internal);
7a84eee5 2393 defsubr (&Ssort_charsets);
3263d5a2 2394
29208e82 2395 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
6f3122a7 2396 doc: /* *List of directories to search for charset map files. */);
4beef065 2397 Vcharset_map_path = Qnil;
4ed46869 2398
29208e82 2399 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
ecca2aad
KH
2400 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2401 inhibit_load_charset_map = 0;
2402
29208e82 2403 DEFVAR_LISP ("charset-list", Vcharset_list,
528623a0 2404 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2405 Vcharset_list = Qnil;
2406
29208e82 2407 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
6a9c90ec
KH
2408 doc: /* ISO639 language mnemonic symbol for the current language environment.
2409If the current language environment is for multiple languages (e.g. "Latin-1"),
2410the value may be a list of mnemonics. */);
2411 Vcurrent_iso639_language = Qnil;
2412
2fe1edd1
KH
2413 charset_ascii
2414 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2415 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2416 charset_iso_8859_1
2417 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2418 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2419 charset_unicode
73fbf2d9 2420 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2421 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2422 charset_emacs
2423 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2424 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2425 charset_eight_bit
2426 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2427 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2428 MAX_5_BYTE_CHAR + 1);
6809ca75 2429 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2430}
2431
2432#endif /* emacs */