use dynwind_begin and dynwind_end
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
4ed46869 2
ba318903 3Copyright (C) 2001-2014 Free Software Foundation, Inc.
1f41ee56
GM
4
5Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
9
10Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
4ed46869 13
369314dc
KH
14This file is part of GNU Emacs.
15
9ec0b715 16GNU Emacs is free software: you can redistribute it and/or modify
369314dc 17it under the terms of the GNU General Public License as published by
9ec0b715
GM
18the Free Software Foundation, either version 3 of the License, or
19(at your option) any later version.
4ed46869 20
369314dc
KH
21GNU Emacs is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24GNU General Public License for more details.
4ed46869 25
369314dc 26You should have received a copy of the GNU General Public License
9ec0b715 27along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 28
68c45bf0 29#include <config.h>
68c45bf0 30
f4b1eb36 31#include <errno.h>
4ed46869 32#include <stdio.h>
3263d5a2 33#include <unistd.h>
726929c4 34#include <limits.h>
4ed46869 35#include <sys/types.h>
620f13b0 36#include <c-ctype.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869 43
04c2f2c5 44/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 45
3263d5a2 46 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 47 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
4ed46869 51
3263d5a2
KH
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
04c2f2c5 55 charset_table as a struct charset.
4ed46869 56
3263d5a2 57*/
4ed46869 58
3263d5a2
KH
59/* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61Lisp_Object Vcharset_hash_table;
4ed46869 62
3263d5a2
KH
63/* Table of struct charset. */
64struct charset *charset_table;
4ed46869 65
0065d054 66static ptrdiff_t charset_table_size;
5af5dd92 67static int charset_table_used;
4ed46869 68
3263d5a2 69Lisp_Object Qcharsetp;
4ed46869 70
3263d5a2
KH
71/* Special charset symbols. */
72Lisp_Object Qascii;
955cbe7b
PE
73static Lisp_Object Qeight_bit;
74static Lisp_Object Qiso_8859_1;
75static Lisp_Object Qunicode;
76static Lisp_Object Qemacs;
4ed46869 77
3263d5a2
KH
78/* The corresponding charsets. */
79int charset_ascii;
2fe1edd1 80int charset_eight_bit;
a2cb4e63 81static int charset_iso_8859_1;
3263d5a2 82int charset_unicode;
a2cb4e63 83static int charset_emacs;
b0e3cf2b 84
7c7dceee
KH
85/* The other special charsets. */
86int charset_jisx0201_roman;
87int charset_jisx0208_1978;
88int charset_jisx0208;
d32320c4 89int charset_ksc5601;
c1a08b4c 90
3263d5a2 91/* Value of charset attribute `charset-iso-plane'. */
955cbe7b 92static Lisp_Object Qgl, Qgr;
c1a08b4c 93
d1a04588
KH
94/* Charset of unibyte characters. */
95int charset_unibyte;
4ed46869 96
3263d5a2
KH
97/* List of charsets ordered by the priority. */
98Lisp_Object Vcharset_ordered_list;
4ed46869 99
6a9c90ec
KH
100/* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102Lisp_Object Vcharset_non_preferred_head;
103
dbbb237d 104/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 105 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
106 -1. */
107unsigned short charset_ordered_list_tick;
4ed46869 108
3263d5a2
KH
109/* List of iso-2022 charsets. */
110Lisp_Object Viso_2022_charset_list;
35e623fb 111
3263d5a2
KH
112/* List of emacs-mule charsets. */
113Lisp_Object Vemacs_mule_charset_list;
114
b84ae584 115int emacs_mule_charset[256];
4ed46869
KH
116
117/* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
119int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
69f8de5b
KH
121#define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
3c7649c1 123 ? (int) ((code) - (charset)->min_code) \
69f8de5b
KH
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
3c7649c1
PE
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
3263d5a2
KH
136 : -1)
137
138
3c7649c1
PE
139/* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
3263d5a2 142
820ee249
KH
143#define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
150 << 8) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
153 << 16) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
155 << 24))))
4cf9710d 156
ecca2aad
KH
157/* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
8a73a704 159
ecca2aad
KH
160static struct
161{
162 /* The current charset for which the following tables are setup. */
163 struct charset *current;
164
165 /* 1 iff the following table is used for encoder. */
166 short for_encoder;
167
0b381c7e
JB
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
ecca2aad
KH
170 int min_char, max_char;
171
e4769531 172 /* A Unicode character corresponding to the code index 0 (i.e. the
ecca2aad 173 minimum code-point) of the current charset, or -1 if the code
e4769531 174 index 0 is not a Unicode character. This is checked when
ecca2aad
KH
175 table.encoder[CHAR] is zero. */
176 int zero_index_char;
177
178 union {
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder[0x20000];
189 } table;
190} *temp_charset_work;
191
192#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
193 do { \
194 if ((CODE) == 0) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
198 else \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
200 } while (0)
201
202#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
208
209#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
211
212#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
046b1f03 214\f
93bcb785 215
e9ce014c
KH
216/* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
d5172d4f 218bool charset_map_loaded;
4ed46869 219
e9ce014c 220struct charset_map_entries
4ed46869 221{
e9ce014c
KH
222 struct {
223 unsigned from, to;
224 int c;
225 } entry[0x10000];
226 struct charset_map_entries *next;
227};
228
ecca2aad
KH
229/* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
232
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
235
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
4cf9710d 238
ecca2aad
KH
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 244
ecca2aad 245 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 246
ecca2aad
KH
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
252*/
4ed46869 253
3263d5a2 254static void
971de7fb 255load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
4ed46869 256{
89ef49df 257 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
3263d5a2 258 unsigned max_code = CHARSET_MAX_CODE (charset);
d5172d4f 259 bool ascii_compatible_p = charset->ascii_compatible_p;
3263d5a2 260 int min_char, max_char, nonascii_min_char;
3263d5a2 261 int i;
3263d5a2 262 unsigned char *fast_map = charset->fast_map;
99529c2c 263
e9ce014c
KH
264 if (n_entries <= 0)
265 return;
266
ecca2aad 267 if (control_flag)
8ac5a9cc 268 {
ecca2aad
KH
269 if (! inhibit_load_charset_map)
270 {
271 if (control_flag == 1)
272 {
273 if (charset->method == CHARSET_METHOD_MAP)
274 {
275 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 276
4939150c
PE
277 vec = Fmake_vector (make_number (n), make_number (-1));
278 set_charset_attr (charset, charset_decoder, vec);
ecca2aad
KH
279 }
280 else
281 {
282 char_table_set_range (Vchar_unify_table,
283 charset->min_char, charset->max_char,
284 Qnil);
285 }
286 }
287 else
288 {
289 table = Fmake_char_table (Qnil, Qnil);
4939150c
PE
290 set_charset_attr (charset,
291 (charset->method == CHARSET_METHOD_MAP
292 ? charset_encoder : charset_deunifier),
293 table);
ecca2aad
KH
294 }
295 }
296 else
297 {
298 if (! temp_charset_work)
38182d90 299 temp_charset_work = xmalloc (sizeof *temp_charset_work);
ecca2aad
KH
300 if (control_flag == 1)
301 {
302 memset (temp_charset_work->table.decoder, -1,
303 sizeof (int) * 0x10000);
ecca2aad
KH
304 }
305 else
306 {
307 memset (temp_charset_work->table.encoder, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work->zero_index_char = -1;
310 }
311 temp_charset_work->current = charset;
312 temp_charset_work->for_encoder = (control_flag == 2);
313 control_flag += 2;
314 }
3263d5a2 315 charset_map_loaded = 1;
2e344af3 316 }
6662e69b 317
e9ce014c 318 min_char = max_char = entries->entry[0].c;
3263d5a2 319 nonascii_min_char = MAX_CHAR;
e9ce014c 320 for (i = 0; i < n_entries; i++)
2e344af3 321 {
e9ce014c 322 unsigned from, to;
3befa583 323 int from_index, to_index, lim_index;
3b4f4446 324 int from_c, to_c;
e9ce014c 325 int idx = i % 0x10000;
3263d5a2 326
e9ce014c
KH
327 if (i > 0 && idx == 0)
328 entries = entries->next;
329 from = entries->entry[idx].from;
330 to = entries->entry[idx].to;
3b4f4446
KH
331 from_c = entries->entry[idx].c;
332 from_index = CODE_POINT_TO_INDEX (charset, from);
333 if (from == to)
6662e69b 334 {
3b4f4446
KH
335 to_index = from_index;
336 to_c = from_c;
6662e69b 337 }
3b4f4446 338 else
6662e69b 339 {
3b4f4446
KH
340 to_index = CODE_POINT_TO_INDEX (charset, to);
341 to_c = from_c + (to_index - from_index);
6662e69b 342 }
3b4f4446
KH
343 if (from_index < 0 || to_index < 0)
344 continue;
3befa583 345 lim_index = to_index + 1;
3263d5a2 346
ecca2aad
KH
347 if (to_c > max_char)
348 max_char = to_c;
349 else if (from_c < min_char)
350 min_char = from_c;
6662e69b 351
ecca2aad
KH
352 if (control_flag == 1)
353 {
354 if (charset->method == CHARSET_METHOD_MAP)
3befa583 355 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
356 ASET (vec, from_index, make_number (from_c));
357 else
3befa583 358 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
359 CHAR_TABLE_SET (Vchar_unify_table,
360 CHARSET_CODE_OFFSET (charset) + from_index,
361 make_number (from_c));
362 }
363 else if (control_flag == 2)
364 {
365 if (charset->method == CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset))
3befa583 367 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad 368 {
3c7649c1
PE
369 unsigned code = from_index;
370 code = INDEX_TO_CODE_POINT (charset, code);
ecca2aad
KH
371
372 if (NILP (CHAR_TABLE_REF (table, from_c)))
373 CHAR_TABLE_SET (table, from_c, make_number (code));
374 }
375 else
3befa583 376 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
377 {
378 if (NILP (CHAR_TABLE_REF (table, from_c)))
379 CHAR_TABLE_SET (table, from_c, make_number (from_index));
380 }
381 }
382 else if (control_flag == 3)
3befa583 383 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
384 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
385 else if (control_flag == 4)
3befa583 386 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
388 else /* control_flag == 0 */
389 {
3b4f4446
KH
390 if (ascii_compatible_p)
391 {
200fc949 392 if (! ASCII_CHAR_P (from_c))
3b4f4446
KH
393 {
394 if (from_c < nonascii_min_char)
395 nonascii_min_char = from_c;
396 }
200fc949 397 else if (! ASCII_CHAR_P (to_c))
3b4f4446
KH
398 {
399 nonascii_min_char = 0x80;
400 }
401 }
177c0ea7 402
ecca2aad
KH
403 for (; from_c <= to_c; from_c++)
404 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 405 }
8ac5a9cc 406 }
3263d5a2 407
ecca2aad 408 if (control_flag == 0)
4ed46869 409 {
3263d5a2
KH
410 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
411 ? nonascii_min_char : min_char);
412 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 413 }
ecca2aad
KH
414 else if (control_flag == 4)
415 {
416 temp_charset_work->min_char = min_char;
417 temp_charset_work->max_char = max_char;
418 }
4ed46869
KH
419}
420
12bcae05 421
3263d5a2 422/* Read a hexadecimal number (preceded by "0x") from the file FP while
3b59c351 423 paying attention to comment character '#'. */
12bcae05 424
b0ab8123 425static unsigned
d5172d4f 426read_hex (FILE *fp, bool *eof, bool *overflow)
12bcae05 427{
3263d5a2
KH
428 int c;
429 unsigned n;
12bcae05 430
3263d5a2
KH
431 while ((c = getc (fp)) != EOF)
432 {
69f8de5b 433 if (c == '#')
3263d5a2
KH
434 {
435 while ((c = getc (fp)) != EOF && c != '\n');
436 }
437 else if (c == '0')
438 {
439 if ((c = getc (fp)) == EOF || c == 'x')
440 break;
441 }
8f924df7 442 }
3263d5a2
KH
443 if (c == EOF)
444 {
445 *eof = 1;
446 return 0;
447 }
3263d5a2 448 n = 0;
620f13b0 449 while (c_isxdigit (c = getc (fp)))
caeeedc1
PE
450 {
451 if (UINT_MAX >> 4 < n)
452 *overflow = 1;
3263d5a2 453 n = ((n << 4)
caeeedc1
PE
454 | (c - ('0' <= c && c <= '9' ? '0'
455 : 'A' <= c && c <= 'F' ? 'A' - 10
456 : 'a' - 10)));
457 }
e9ce014c
KH
458 if (c != EOF)
459 ungetc (c, fp);
3263d5a2
KH
460 return n;
461}
12bcae05 462
3263d5a2 463/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
464 Each line of MAPFILE has this form
465 0xAAAA 0xCCCC
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
468 0xAAAA-0xBBBB 0xCCCC
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
4ed46869 471
3263d5a2
KH
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 474 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
475 range.
476
18a10a21
JB
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 479
e9ce014c 480static void
f4b1eb36
PE
481load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
482 int control_flag)
4ed46869 483{
e9ce014c
KH
484 unsigned min_code = CHARSET_MIN_CODE (charset);
485 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2 486 int fd;
5d1922cd 487 FILE *fp = NULL;
3263d5a2 488 Lisp_Object suffixes;
e9ce014c 489 struct charset_map_entries *head, *entries;
d311d28c
PE
490 int n_entries;
491 ptrdiff_t count;
4ed46869 492
6c6f1994 493 suffixes = list2 (build_string (".map"), build_string (".TXT"));
4ed46869 494
2bfa3d3e 495 dynwind_begin ();
5d1922cd 496 record_unwind_protect_ptr (fclose_ptr_unwind, &fp);
9f62b5dd 497 {
2bfa3d3e 498 dynwind_begin ();
9f62b5dd
BT
499 specbind (Qfile_name_handler_alist, Qnil);
500 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
501 fp = fd < 0 ? 0 : fdopen (fd, "r");
502 if (!fp)
503 {
504 int open_errno = errno;
505 emacs_close (fd);
506 report_file_errno ("Loading charset map", mapfile, open_errno);
507 }
2bfa3d3e 508 dynwind_end ();
9f62b5dd 509 }
4ed46869 510
e96be6dd
BT
511 /* Use record, as `charset_map_entries' is large (larger than
512 MAX_ALLOCA). */
513 head = xmalloc (sizeof *head);
a2f3eb19 514 entries = head;
72af86bd 515 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 516
e9ce014c 517 n_entries = 0;
3263d5a2
KH
518 while (1)
519 {
caeeedc1 520 unsigned from, to, c;
e9ce014c 521 int idx;
d5172d4f 522 bool eof = 0, overflow = 0;
4ed46869 523
caeeedc1 524 from = read_hex (fp, &eof, &overflow);
3263d5a2
KH
525 if (eof)
526 break;
e9ce014c 527 if (getc (fp) == '-')
caeeedc1 528 to = read_hex (fp, &eof, &overflow);
e9ce014c
KH
529 else
530 to = from;
caeeedc1
PE
531 if (eof)
532 break;
533 c = read_hex (fp, &eof, &overflow);
534 if (eof)
535 break;
ac4137cc 536
caeeedc1
PE
537 if (overflow)
538 continue;
e9ce014c
KH
539 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
540 continue;
ac4137cc 541
f4b1eb36 542 if (n_entries == 0x10000)
3263d5a2 543 {
e96be6dd 544 entries->next = xmalloc (sizeof *entries->next);
e9ce014c 545 entries = entries->next;
72af86bd 546 memset (entries, 0, sizeof (struct charset_map_entries));
d311d28c 547 n_entries = 0;
3263d5a2 548 }
d311d28c 549 idx = n_entries;
e9ce014c
KH
550 entries->entry[idx].from = from;
551 entries->entry[idx].to = to;
552 entries->entry[idx].c = c;
553 n_entries++;
3263d5a2
KH
554 }
555 fclose (fp);
5d1922cd 556 fp = NULL;
177c0ea7 557
e9ce014c 558 load_charset_map (charset, head, n_entries, control_flag);
2bfa3d3e 559 dynwind_end ();
4ed46869
KH
560}
561
e9ce014c 562static void
971de7fb 563load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
23d2a7f1 564{
e9ce014c
KH
565 unsigned min_code = CHARSET_MIN_CODE (charset);
566 unsigned max_code = CHARSET_MAX_CODE (charset);
567 struct charset_map_entries *head, *entries;
568 int n_entries;
569 int len = ASIZE (vec);
570 int i;
a2f3eb19 571 USE_SAFE_ALLOCA;
23d2a7f1 572
e9ce014c 573 if (len % 2 == 1)
3263d5a2 574 {
e9ce014c
KH
575 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
576 return;
3263d5a2 577 }
35e623fb 578
a2f3eb19
CY
579 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
580 large (larger than MAX_ALLOCA). */
98c6f1e3 581 head = SAFE_ALLOCA (sizeof *head);
a2f3eb19 582 entries = head;
72af86bd 583 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 584
e9ce014c
KH
585 n_entries = 0;
586 for (i = 0; i < len; i += 2)
35e623fb 587 {
e9ce014c
KH
588 Lisp_Object val, val2;
589 unsigned from, to;
d311d28c 590 EMACS_INT c;
e9ce014c 591 int idx;
d2665018 592
e9ce014c
KH
593 val = AREF (vec, i);
594 if (CONSP (val))
bbf12bb3 595 {
e9ce014c
KH
596 val2 = XCDR (val);
597 val = XCAR (val);
e9ce014c
KH
598 from = XFASTINT (val);
599 to = XFASTINT (val2);
bbf12bb3 600 }
e9ce014c 601 else
d311d28c 602 from = to = XFASTINT (val);
e9ce014c
KH
603 val = AREF (vec, i + 1);
604 CHECK_NATNUM (val);
605 c = XFASTINT (val);
76d7b829 606
e9ce014c
KH
607 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
608 continue;
76d7b829 609
dcc694d7 610 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 611 {
98c6f1e3 612 entries->next = SAFE_ALLOCA (sizeof *entries->next);
e9ce014c 613 entries = entries->next;
72af86bd 614 memset (entries, 0, sizeof (struct charset_map_entries));
e9ce014c
KH
615 }
616 idx = n_entries % 0x10000;
617 entries->entry[idx].from = from;
618 entries->entry[idx].to = to;
619 entries->entry[idx].c = c;
620 n_entries++;
621 }
76d7b829 622
e9ce014c 623 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 624 SAFE_FREE ();
ac4137cc
KH
625}
626
ecca2aad
KH
627
628/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
629 map it is (see the comment of load_charset_map for the detail). */
630
3263d5a2 631static void
971de7fb 632load_charset (struct charset *charset, int control_flag)
76d7b829 633{
ecca2aad 634 Lisp_Object map;
76d7b829 635
ecca2aad
KH
636 if (inhibit_load_charset_map
637 && temp_charset_work
638 && charset == temp_charset_work->current
56f00ed2 639 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
640 return;
641
642 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
643 map = CHARSET_MAP (charset);
0ac2c299
PE
644 else
645 {
646 if (! CHARSET_UNIFIED_P (charset))
1088b922 647 emacs_abort ();
0ac2c299
PE
648 map = CHARSET_UNIFY_MAP (charset);
649 }
ecca2aad
KH
650 if (STRINGP (map))
651 load_charset_map_from_file (charset, map, control_flag);
652 else
653 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 654}
76d7b829 655
3263d5a2
KH
656
657DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
658 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
5842a27b 659 (Lisp_Object object)
23d2a7f1 660{
3263d5a2 661 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
662}
663
4ed46869 664
a2cb4e63
PE
665static void
666map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
667 Lisp_Object function, Lisp_Object arg,
668 unsigned int from, unsigned int to)
ecca2aad
KH
669{
670 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
671 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
672 Lisp_Object range;
673 int c, stop;
674 struct gcpro gcpro1;
675
676 range = Fcons (Qnil, Qnil);
677 GCPRO1 (range);
678
679 c = temp_charset_work->min_char;
680 stop = (temp_charset_work->max_char < 0x20000
681 ? temp_charset_work->max_char : 0xFFFF);
51b59d79 682
ecca2aad
KH
683 while (1)
684 {
f6095868 685 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
ecca2aad 686
f6095868 687 if (idx >= from_idx && idx <= to_idx)
ecca2aad
KH
688 {
689 if (NILP (XCAR (range)))
690 XSETCAR (range, make_number (c));
691 }
692 else if (! NILP (XCAR (range)))
693 {
694 XSETCDR (range, make_number (c - 1));
695 if (c_function)
696 (*c_function) (arg, range);
697 else
698 call2 (function, range, arg);
699 XSETCAR (range, Qnil);
700 }
701 if (c == stop)
702 {
703 if (c == temp_charset_work->max_char)
704 {
705 if (! NILP (XCAR (range)))
706 {
707 XSETCDR (range, make_number (c));
708 if (c_function)
709 (*c_function) (arg, range);
710 else
711 call2 (function, range, arg);
712 }
713 break;
714 }
715 c = 0x1FFFF;
716 stop = temp_charset_work->max_char;
717 }
718 c++;
719 }
c542407d 720 UNGCPRO;
ecca2aad
KH
721}
722
4ed46869 723void
6f704c76
DN
724map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
725 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
4ed46869 726{
3263d5a2 727 Lisp_Object range;
d5172d4f
PE
728 bool partial = (from > CHARSET_MIN_CODE (charset)
729 || to < CHARSET_MAX_CODE (charset));
374c5cfd 730
3263d5a2 731 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 732 {
374c5cfd
KH
733 int from_idx = CODE_POINT_TO_INDEX (charset, from);
734 int to_idx = CODE_POINT_TO_INDEX (charset, to);
735 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
736 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
737
ecca2aad
KH
738 if (CHARSET_UNIFIED_P (charset))
739 {
740 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
741 load_charset (charset, 2);
742 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
743 map_char_table_for_charset (c_function, function,
744 CHARSET_DEUNIFIER (charset), arg,
745 partial ? charset : NULL, from, to);
746 else
747 map_charset_for_dump (c_function, function, arg, from, to);
748 }
749
374c5cfd 750 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 751 if (NILP (function))
5af5dd92 752 (*c_function) (arg, range);
3263d5a2
KH
753 else
754 call2 (function, range, arg);
c83ef371 755 }
3263d5a2
KH
756 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
757 {
758 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
759 load_charset (charset, 2);
760 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
761 map_char_table_for_charset (c_function, function,
762 CHARSET_ENCODER (charset), arg,
763 partial ? charset : NULL, from, to);
764 else
765 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 766 }
374c5cfd 767 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 768 {
374c5cfd
KH
769 Lisp_Object subset_info;
770 int offset;
771
772 subset_info = CHARSET_SUBSET (charset);
773 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
774 offset = XINT (AREF (subset_info, 3));
775 from -= offset;
776 if (from < XFASTINT (AREF (subset_info, 1)))
777 from = XFASTINT (AREF (subset_info, 1));
778 to -= offset;
779 if (to > XFASTINT (AREF (subset_info, 2)))
780 to = XFASTINT (AREF (subset_info, 2));
781 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 782 }
374c5cfd
KH
783 else /* i.e. CHARSET_METHOD_SUPERSET */
784 {
785 Lisp_Object parents;
4ed46869 786
374c5cfd
KH
787 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
788 parents = XCDR (parents))
bbf12bb3 789 {
374c5cfd
KH
790 int offset;
791 unsigned this_from, this_to;
792
793 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
794 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
795 this_from = from > offset ? from - offset : 0;
796 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
797 if (this_from < CHARSET_MIN_CODE (charset))
798 this_from = CHARSET_MIN_CODE (charset);
799 if (this_to > CHARSET_MAX_CODE (charset))
800 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
801 map_charset_chars (c_function, function, arg, charset,
802 this_from, this_to);
bbf12bb3 803 }
35e623fb 804 }
4ed46869
KH
805}
806
374c5cfd 807DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 808 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 809FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 810argument ARG.
4ed46869 811
374c5cfd
KH
812RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
813characters contained in CHARSET.
4ed46869 814
374c5cfd 815The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 816range of code points (in CHARSET) of target characters. */)
5842a27b 817 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
4ed46869 818{
374c5cfd 819 struct charset *cs;
16fed1fc 820 unsigned from, to;
4ed46869 821
374c5cfd
KH
822 CHECK_CHARSET_GET_CHARSET (charset, cs);
823 if (NILP (from_code))
16fed1fc 824 from = CHARSET_MIN_CODE (cs);
970b7474 825 else
4ed46869 826 {
970b7474
KH
827 from = XINT (from_code);
828 if (from < CHARSET_MIN_CODE (cs))
829 from = CHARSET_MIN_CODE (cs);
4ed46869 830 }
374c5cfd 831 if (NILP (to_code))
970b7474 832 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
833 else
834 {
970b7474
KH
835 to = XINT (to_code);
836 if (to > CHARSET_MAX_CODE (cs))
837 to = CHARSET_MAX_CODE (cs);
4ed46869 838 }
16fed1fc 839 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 840 return Qnil;
35e623fb 841}
4ed46869 842
4ed46869 843
3263d5a2
KH
844/* Define a charset according to the arguments. The Nth argument is
845 the Nth attribute of the charset (the last attribute `charset-id'
846 is not included). See the docstring of `define-charset' for the
847 detail. */
4ed46869 848
3263d5a2
KH
849DEFUN ("define-charset-internal", Fdefine_charset_internal,
850 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
851 doc: /* For internal use only.
852usage: (define-charset-internal ...) */)
f66c7cf8 853 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 854{
3263d5a2
KH
855 /* Charset attr vector. */
856 Lisp_Object attrs;
857 Lisp_Object val;
0de4bb68 858 EMACS_UINT hash_code;
3263d5a2 859 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 860 int i, j;
3263d5a2
KH
861 struct charset charset;
862 int id;
863 int dimension;
d5172d4f 864 bool new_definition_p;
3263d5a2
KH
865 int nchars;
866
867 if (nargs != charset_arg_max)
868 return Fsignal (Qwrong_number_of_arguments,
869 Fcons (intern ("define-charset-internal"),
870 make_number (nargs)));
871
872 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
873
874 CHECK_SYMBOL (args[charset_arg_name]);
875 ASET (attrs, charset_name, args[charset_arg_name]);
876
877 val = args[charset_arg_code_space];
c032b5f8 878 for (i = 0, dimension = 0, nchars = 1; ; i++)
76d7b829 879 {
d311d28c 880 Lisp_Object min_byte_obj, max_byte_obj;
3263d5a2
KH
881 int min_byte, max_byte;
882
d311d28c
PE
883 min_byte_obj = Faref (val, make_number (i * 2));
884 max_byte_obj = Faref (val, make_number (i * 2 + 1));
af5a5a98 885 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
d311d28c 886 min_byte = XINT (min_byte_obj);
af5a5a98 887 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
d311d28c 888 max_byte = XINT (max_byte_obj);
3263d5a2
KH
889 charset.code_space[i * 4] = min_byte;
890 charset.code_space[i * 4 + 1] = max_byte;
891 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
3263d5a2
KH
892 if (max_byte > 0)
893 dimension = i + 1;
c032b5f8
PE
894 if (i == 3)
895 break;
896 nchars *= charset.code_space[i * 4 + 2];
897 charset.code_space[i * 4 + 3] = nchars;
3263d5a2 898 }
4ed46869 899
3263d5a2
KH
900 val = args[charset_arg_dimension];
901 if (NILP (val))
902 charset.dimension = dimension;
903 else
4ed46869 904 {
af5a5a98 905 CHECK_RANGED_INTEGER (val, 1, 4);
3263d5a2 906 charset.dimension = XINT (val);
4ed46869
KH
907 }
908
3263d5a2
KH
909 charset.code_linear_p
910 = (charset.dimension == 1
911 || (charset.code_space[2] == 256
912 && (charset.dimension == 2
913 || (charset.code_space[6] == 256
914 && (charset.dimension == 3
915 || charset.code_space[10] == 256)))));
916
69f8de5b 917 if (! charset.code_linear_p)
4ed46869 918 {
c1ade6f7 919 charset.code_space_mask = xzalloc_atomic (256);
69f8de5b
KH
920 for (i = 0; i < 4; i++)
921 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
922 j++)
923 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
924 }
925
3263d5a2 926 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 927
3263d5a2
KH
928 charset.min_code = (charset.code_space[0]
929 | (charset.code_space[4] << 8)
930 | (charset.code_space[8] << 16)
3c7649c1 931 | ((unsigned) charset.code_space[12] << 24));
3263d5a2
KH
932 charset.max_code = (charset.code_space[1]
933 | (charset.code_space[5] << 8)
934 | (charset.code_space[9] << 16)
3c7649c1 935 | ((unsigned) charset.code_space[13] << 24));
820ee249 936 charset.char_index_offset = 0;
8a73a704 937
820ee249
KH
938 val = args[charset_arg_min_code];
939 if (! NILP (val))
940 {
be44ca6c 941 unsigned code = cons_to_unsigned (val, UINT_MAX);
fdb82f93 942
820ee249
KH
943 if (code < charset.min_code
944 || code > charset.max_code)
3c7649c1
PE
945 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
946 make_fixnum_or_float (charset.max_code), val);
820ee249
KH
947 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
948 charset.min_code = code;
949 }
3fac5a51 950
820ee249
KH
951 val = args[charset_arg_max_code];
952 if (! NILP (val))
3fac5a51 953 {
be44ca6c 954 unsigned code = cons_to_unsigned (val, UINT_MAX);
820ee249 955
820ee249
KH
956 if (code < charset.min_code
957 || code > charset.max_code)
3c7649c1
PE
958 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
959 make_fixnum_or_float (charset.max_code), val);
820ee249 960 charset.max_code = code;
3fac5a51 961 }
3fac5a51 962
ecca2aad 963 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 964
3263d5a2
KH
965 val = args[charset_arg_invalid_code];
966 if (NILP (val))
967 {
968 if (charset.min_code > 0)
969 charset.invalid_code = 0;
bbf12bb3
KH
970 else
971 {
3c7649c1 972 if (charset.max_code < UINT_MAX)
3263d5a2
KH
973 charset.invalid_code = charset.max_code + 1;
974 else
975 error ("Attribute :invalid-code must be specified");
76d7b829 976 }
76d7b829 977 }
3263d5a2 978 else
3c7649c1 979 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
4ed46869 980
3263d5a2
KH
981 val = args[charset_arg_iso_final];
982 if (NILP (val))
983 charset.iso_final = -1;
984 else
985 {
986 CHECK_NUMBER (val);
987 if (XINT (val) < '0' || XINT (val) > 127)
c2982e87 988 error ("Invalid iso-final-char: %"pI"d", XINT (val));
3263d5a2
KH
989 charset.iso_final = XINT (val);
990 }
4ed46869 991
3263d5a2
KH
992 val = args[charset_arg_iso_revision];
993 if (NILP (val))
994 charset.iso_revision = -1;
995 else
4ed46869 996 {
af5a5a98 997 CHECK_RANGED_INTEGER (val, -1, 63);
3263d5a2 998 charset.iso_revision = XINT (val);
4ed46869 999 }
4ed46869 1000
3263d5a2
KH
1001 val = args[charset_arg_emacs_mule_id];
1002 if (NILP (val))
1003 charset.emacs_mule_id = -1;
4ed46869
KH
1004 else
1005 {
3263d5a2
KH
1006 CHECK_NATNUM (val);
1007 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
c2982e87 1008 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
3263d5a2 1009 charset.emacs_mule_id = XINT (val);
c83ef371 1010 }
f6302ac9 1011
3263d5a2 1012 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1013
3263d5a2 1014 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1015
3263d5a2
KH
1016 charset.unified_p = 0;
1017
72af86bd 1018 memset (charset.fast_map, 0, sizeof (charset.fast_map));
3263d5a2
KH
1019
1020 if (! NILP (args[charset_arg_code_offset]))
1021 {
1022 val = args[charset_arg_code_offset];
3c7649c1 1023 CHECK_CHARACTER (val);
3263d5a2
KH
1024
1025 charset.method = CHARSET_METHOD_OFFSET;
1026 charset.code_offset = XINT (val);
1027
3263d5a2 1028 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
3c7649c1 1029 if (MAX_CHAR - charset.code_offset < i)
3263d5a2 1030 error ("Unsupported max char: %d", charset.max_char);
3c7649c1
PE
1031 charset.max_char = i + charset.code_offset;
1032 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1033 charset.min_char = i + charset.code_offset;
3263d5a2 1034
f148205f
KH
1035 i = (charset.min_char >> 7) << 7;
1036 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1037 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1038 i = (i >> 12) << 12;
3263d5a2
KH
1039 for (; i <= charset.max_char; i += 0x1000)
1040 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1041 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1042 charset.ascii_compatible_p = 1;
3263d5a2
KH
1043 }
1044 else if (! NILP (args[charset_arg_map]))
1045 {
1046 val = args[charset_arg_map];
1047 ASET (attrs, charset_map, val);
ecca2aad 1048 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1049 }
374c5cfd 1050 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1051 {
374c5cfd
KH
1052 Lisp_Object parent;
1053 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1054 struct charset *parent_charset;
1055
1056 val = args[charset_arg_subset];
1057 parent = Fcar (val);
1058 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1059 parent_min_code = Fnth (make_number (1), val);
1060 CHECK_NATNUM (parent_min_code);
1061 parent_max_code = Fnth (make_number (2), val);
1062 CHECK_NATNUM (parent_max_code);
1063 parent_code_offset = Fnth (make_number (3), val);
1064 CHECK_NUMBER (parent_code_offset);
25721f5b 1065 val = make_uninit_vector (4);
374c5cfd
KH
1066 ASET (val, 0, make_number (parent_charset->id));
1067 ASET (val, 1, parent_min_code);
1068 ASET (val, 2, parent_max_code);
1069 ASET (val, 3, parent_code_offset);
1070 ASET (attrs, charset_subset, val);
1071
1072 charset.method = CHARSET_METHOD_SUBSET;
1073 /* Here, we just copy the parent's fast_map. It's not accurate,
1074 but at least it works for quickly detecting which character
1075 DOESN'T belong to this charset. */
1076 for (i = 0; i < 190; i++)
1077 charset.fast_map[i] = parent_charset->fast_map[i];
1078
1079 /* We also copy these for parents. */
1080 charset.min_char = parent_charset->min_char;
1081 charset.max_char = parent_charset->max_char;
1082 }
1083 else if (! NILP (args[charset_arg_superset]))
0282eb69 1084 {
374c5cfd
KH
1085 val = args[charset_arg_superset];
1086 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1087 val = Fcopy_sequence (val);
374c5cfd 1088 ASET (attrs, charset_superset, val);
3263d5a2
KH
1089
1090 charset.min_char = MAX_CHAR;
1091 charset.max_char = 0;
1092 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1093 {
3263d5a2
KH
1094 Lisp_Object elt, car_part, cdr_part;
1095 int this_id, offset;
1096 struct charset *this_charset;
2e344af3 1097
3263d5a2
KH
1098 elt = Fcar (val);
1099 if (CONSP (elt))
2e344af3 1100 {
3263d5a2
KH
1101 car_part = XCAR (elt);
1102 cdr_part = XCDR (elt);
1103 CHECK_CHARSET_GET_ID (car_part, this_id);
d311d28c 1104 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
3263d5a2 1105 offset = XINT (cdr_part);
177c0ea7 1106 }
3263d5a2 1107 else
4ed46869 1108 {
3263d5a2
KH
1109 CHECK_CHARSET_GET_ID (elt, this_id);
1110 offset = 0;
4ed46869 1111 }
3263d5a2
KH
1112 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1113
1114 this_charset = CHARSET_FROM_ID (this_id);
1115 if (charset.min_char > this_charset->min_char)
1116 charset.min_char = this_charset->min_char;
1117 if (charset.max_char < this_charset->max_char)
1118 charset.max_char = this_charset->max_char;
1119 for (i = 0; i < 190; i++)
1120 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1121 }
0282eb69 1122 }
2e344af3 1123 else
3263d5a2 1124 error ("None of :code-offset, :map, :parents are specified");
05505664 1125
3263d5a2
KH
1126 val = args[charset_arg_unify_map];
1127 if (! NILP (val) && !STRINGP (val))
1128 CHECK_VECTOR (val);
1129 ASET (attrs, charset_unify_map, val);
05505664 1130
3263d5a2
KH
1131 CHECK_LIST (args[charset_arg_plist]);
1132 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1133
3263d5a2
KH
1134 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1135 &hash_code);
1136 if (charset.hash_index >= 0)
1137 {
1138 new_definition_p = 0;
4f65af01 1139 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
e83064be 1140 set_hash_value_slot (hash_table, charset.hash_index, attrs);
3263d5a2 1141 }
1a45ff10 1142 else
3263d5a2
KH
1143 {
1144 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1145 hash_code);
1146 if (charset_table_used == charset_table_size)
1147 {
e097a6fa 1148 /* Ensure that charset IDs fit into 'int' as well as into the
0065d054
PE
1149 restriction imposed by fixnums. Although the 'int' restriction
1150 could be removed, too much other code would need altering; for
1151 example, the IDs are stuffed into struct
1152 coding_system.charbuf[i] entries, which are 'int'. */
1153 int old_size = charset_table_size;
2dd2e622 1154 ptrdiff_t new_size = old_size;
0065d054 1155 struct charset *new_table =
2dd2e622 1156 xpalloc (0, &new_size, 1,
0065d054
PE
1157 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1158 sizeof *charset_table);
1159 memcpy (new_table, charset_table, old_size * sizeof *new_table);
2fe1edd1 1160 charset_table = new_table;
2dd2e622 1161 charset_table_size = new_size;
f701dc2a
PE
1162 /* FIXME: This leaks memory, as the old charset_table becomes
1163 unreachable. If the old charset table is charset_table_init
1164 then this leak is intentional; otherwise, it's unclear.
1165 If the latter memory leak is intentional, a
0065d054
PE
1166 comment should be added to explain this. If not, the old
1167 charset_table should be freed, by passing it as the 1st argument
1168 to xpalloc and removing the memcpy. */
3263d5a2
KH
1169 }
1170 id = charset_table_used++;
3263d5a2
KH
1171 new_definition_p = 1;
1172 }
2e344af3 1173
4f65af01 1174 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1175 charset.id = id;
1176 charset_table[id] = charset;
2e344af3 1177
ecca2aad 1178 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1179 {
1180 load_charset (&charset, 0);
1181 charset_table[id] = charset;
1182 }
ecca2aad 1183
3263d5a2 1184 if (charset.iso_final >= 0)
4ed46869 1185 {
3263d5a2
KH
1186 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1187 charset.iso_final) = id;
1188 if (new_definition_p)
1189 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
6c6f1994 1190 list1 (make_number (id)));
7c7dceee
KH
1191 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1192 charset_jisx0201_roman = id;
1193 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1194 charset_jisx0208_1978 = id;
1195 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1196 charset_jisx0208 = id;
d32320c4
KH
1197 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1198 charset_ksc5601 = id;
4ed46869 1199 }
d0cf2d48 1200
3263d5a2 1201 if (charset.emacs_mule_id >= 0)
4ed46869 1202 {
b84ae584 1203 emacs_mule_charset[charset.emacs_mule_id] = id;
4f65af01
KH
1204 if (charset.emacs_mule_id < 0xA0)
1205 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1206 else
1207 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1208 if (new_definition_p)
1209 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
6c6f1994 1210 list1 (make_number (id)));
4ed46869
KH
1211 }
1212
3263d5a2
KH
1213 if (new_definition_p)
1214 {
1215 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1216 if (charset.supplementary_p)
1217 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
6c6f1994 1218 list1 (make_number (id)));
8055c66a 1219 else
880820fe
KH
1220 {
1221 Lisp_Object tail;
1222
1223 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1224 {
1225 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1226
1227 if (cs->supplementary_p)
1228 break;
1229 }
1230 if (EQ (tail, Vcharset_ordered_list))
1231 Vcharset_ordered_list = Fcons (make_number (id),
1232 Vcharset_ordered_list);
1233 else if (NILP (tail))
1234 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
6c6f1994 1235 list1 (make_number (id)));
880820fe
KH
1236 else
1237 {
1238 val = Fcons (XCAR (tail), XCDR (tail));
1239 XSETCDR (tail, val);
1240 XSETCAR (tail, make_number (id));
1241 }
1242 }
dbbb237d 1243 charset_ordered_list_tick++;
3263d5a2 1244 }
4ed46869 1245
3263d5a2 1246 return Qnil;
4ed46869
KH
1247}
1248
2fe1edd1
KH
1249
1250/* Same as Fdefine_charset_internal but arguments are more convenient
1251 to call from C (typically in syms_of_charset). This can define a
1252 charset of `offset' method only. Return the ID of the new
1253 charset. */
1254
1255static int
dd4c5104
DN
1256define_charset_internal (Lisp_Object name,
1257 int dimension,
dfb6afda 1258 const char *code_space_chars,
dd4c5104
DN
1259 unsigned min_code, unsigned max_code,
1260 int iso_final, int iso_revision, int emacs_mule_id,
d5172d4f 1261 bool ascii_compatible, bool supplementary,
dd4c5104 1262 int code_offset)
2fe1edd1 1263{
dfb6afda 1264 const unsigned char *code_space = (const unsigned char *) code_space_chars;
2fe1edd1 1265 Lisp_Object args[charset_arg_max];
2fe1edd1
KH
1266 Lisp_Object val;
1267 int i;
1268
1269 args[charset_arg_name] = name;
1270 args[charset_arg_dimension] = make_number (dimension);
25721f5b 1271 val = make_uninit_vector (8);
2fe1edd1
KH
1272 for (i = 0; i < 8; i++)
1273 ASET (val, i, make_number (code_space[i]));
1274 args[charset_arg_code_space] = val;
1275 args[charset_arg_min_code] = make_number (min_code);
1276 args[charset_arg_max_code] = make_number (max_code);
1277 args[charset_arg_iso_final]
1278 = (iso_final < 0 ? Qnil : make_number (iso_final));
1279 args[charset_arg_iso_revision] = make_number (iso_revision);
1280 args[charset_arg_emacs_mule_id]
1281 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1282 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1283 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1284 args[charset_arg_invalid_code] = Qnil;
1285 args[charset_arg_code_offset] = make_number (code_offset);
1286 args[charset_arg_map] = Qnil;
1287 args[charset_arg_subset] = Qnil;
1288 args[charset_arg_superset] = Qnil;
1289 args[charset_arg_unify_map] = Qnil;
1290
694b6c97 1291 args[charset_arg_plist] =
3438fe21 1292 listn (CONSTYPE_HEAP, 14,
694b6c97
DA
1293 intern_c_string (":name"),
1294 args[charset_arg_name],
1295 intern_c_string (":dimension"),
1296 args[charset_arg_dimension],
1297 intern_c_string (":code-space"),
1298 args[charset_arg_code_space],
1299 intern_c_string (":iso-final-char"),
1300 args[charset_arg_iso_final],
1301 intern_c_string (":emacs-mule-id"),
1302 args[charset_arg_emacs_mule_id],
1303 intern_c_string (":ascii-compatible-p"),
1304 args[charset_arg_ascii_compatible_p],
1305 intern_c_string (":code-offset"),
1306 args[charset_arg_code_offset]);
2fe1edd1
KH
1307 Fdefine_charset_internal (charset_arg_max, args);
1308
1309 return XINT (CHARSET_SYMBOL_ID (name));
1310}
1311
1312
3263d5a2
KH
1313DEFUN ("define-charset-alias", Fdefine_charset_alias,
1314 Sdefine_charset_alias, 2, 2, 0,
1315 doc: /* Define ALIAS as an alias for charset CHARSET. */)
5842a27b 1316 (Lisp_Object alias, Lisp_Object charset)
4ed46869 1317{
3263d5a2
KH
1318 Lisp_Object attr;
1319
1320 CHECK_CHARSET_GET_ATTR (charset, attr);
1321 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1322 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1323 return Qnil;
1324}
4ed46869 1325
4ed46869 1326
3263d5a2 1327DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1328 doc: /* Return the property list of CHARSET. */)
5842a27b 1329 (Lisp_Object charset)
3263d5a2
KH
1330{
1331 Lisp_Object attrs;
1332
1333 CHECK_CHARSET_GET_ATTR (charset, attrs);
1334 return CHARSET_ATTR_PLIST (attrs);
1335}
1336
1337
1338DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1339 doc: /* Set CHARSET's property list to PLIST. */)
5842a27b 1340 (Lisp_Object charset, Lisp_Object plist)
3263d5a2
KH
1341{
1342 Lisp_Object attrs;
1343
1344 CHECK_CHARSET_GET_ATTR (charset, attrs);
4939150c 1345 ASET (attrs, charset_plist, plist);
3263d5a2
KH
1346 return plist;
1347}
1348
1349
dbbb237d 1350DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1351 doc: /* Unify characters of CHARSET with Unicode.
1352This means reading the relevant file and installing the table defined
dbbb237d
KH
1353by CHARSET's `:unify-map' property.
1354
64165ae2
DL
1355Optional second arg UNIFY-MAP is a file name string or a vector. It has
1356the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1357`define-charset' (which see).
1358
1359Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
5842a27b 1360 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
8a73a704 1361{
3263d5a2
KH
1362 int id;
1363 struct charset *cs;
8f924df7 1364
3263d5a2
KH
1365 CHECK_CHARSET_GET_ID (charset, id);
1366 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1367 if (NILP (deunify)
1368 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1369 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1370 return Qnil;
dbbb237d 1371
3263d5a2 1372 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1373 if (NILP (deunify))
1374 {
ecca2aad
KH
1375 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1376 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1377 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1378 if (NILP (unify_map))
1379 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1380 else
ecca2aad
KH
1381 {
1382 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1383 signal_error ("Bad unify-map", unify_map);
4939150c 1384 set_charset_attr (cs, charset_unify_map, unify_map);
ecca2aad
KH
1385 }
1386 if (NILP (Vchar_unify_table))
1387 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1388 char_table_set_range (Vchar_unify_table,
1389 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1390 CHARSET_UNIFIED_P (cs) = 1;
1391 }
1392 else if (CHAR_TABLE_P (Vchar_unify_table))
1393 {
3c7649c1
PE
1394 unsigned min_code = CHARSET_MIN_CODE (cs);
1395 unsigned max_code = CHARSET_MAX_CODE (cs);
dbbb237d
KH
1396 int min_char = DECODE_CHAR (cs, min_code);
1397 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1398
dbbb237d
KH
1399 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1400 }
8f924df7 1401
3263d5a2 1402 return Qnil;
8a73a704
KH
1403}
1404
3fac5a51
KH
1405DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1406 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1407 doc: /*
d0cf2d48 1408Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1409DIMENSION is the number of bytes to represent a character: 1 or 2.
1410CHARS is the number of characters in a dimension: 94 or 96.
1411
1412This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1413If there's no unused final char for the specified kind of charset,
fdb82f93 1414return nil. */)
5842a27b 1415 (Lisp_Object dimension, Lisp_Object chars)
3fac5a51
KH
1416{
1417 int final_char;
1418
b7826503
PJ
1419 CHECK_NUMBER (dimension);
1420 CHECK_NUMBER (chars);
3263d5a2
KH
1421 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1422 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1423 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1424 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1425 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1426 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1427 break;
3fac5a51
KH
1428 return (final_char <= '?' ? make_number (final_char) : Qnil);
1429}
1430
3263d5a2 1431static void
971de7fb 1432check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
4ed46869 1433{
3263d5a2
KH
1434 CHECK_NATNUM (dimension);
1435 CHECK_NATNUM (chars);
e6c3da20 1436 CHECK_CHARACTER (final_char);
4ed46869 1437
3263d5a2 1438 if (XINT (dimension) > 3)
c2982e87 1439 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
5fdb398c 1440 XINT (dimension));
4ed46869 1441 if (XINT (chars) != 94 && XINT (chars) != 96)
c2982e87 1442 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
3263d5a2 1443 if (XINT (final_char) < '0' || XINT (final_char) > '~')
e6c3da20
EZ
1444 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1445 (int)XINT (final_char));
3263d5a2
KH
1446}
1447
1448
1449DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1450 4, 4, 0,
cefd8c4f
KH
1451 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1452
1453On decoding by an ISO-2022 base coding system, when a charset
1454specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1455if CHARSET is designated instead. */)
5842a27b 1456 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
3263d5a2
KH
1457{
1458 int id;
d5172d4f 1459 bool chars_flag;
4ed46869 1460
3263d5a2
KH
1461 CHECK_CHARSET_GET_ID (charset, id);
1462 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1463 chars_flag = XINT (chars) == 96;
1464 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1465 return Qnil;
1466}
1467
3263d5a2 1468
2e344af3
KH
1469/* Return information about charsets in the text at PTR of NBYTES
1470 bytes, which are NCHARS characters. The value is:
f6302ac9 1471
cfe34140 1472 0: Each character is represented by one byte. This is always
3263d5a2
KH
1473 true for a unibyte string. For a multibyte string, true if
1474 it contains only ASCII characters.
1475
28c026cd
DL
1476 1: No charsets other than ascii, control-1, and latin-1 are
1477 found.
1d67c29b 1478
3263d5a2
KH
1479 2: Otherwise.
1480*/
4ed46869
KH
1481
1482int
971de7fb 1483string_xstring_p (Lisp_Object string)
4ed46869 1484{
8f924df7
KH
1485 const unsigned char *p = SDATA (string);
1486 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1487
8f924df7 1488 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1489 return 0;
1490
3263d5a2 1491 while (p < endp)
0282eb69 1492 {
3263d5a2 1493 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1494
3cc67a4d 1495 if (c >= 0x100)
3263d5a2 1496 return 2;
0282eb69 1497 }
3263d5a2
KH
1498 return 1;
1499}
05505664 1500
05505664 1501
3263d5a2 1502/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1503
3cc67a4d
KH
1504 CHARSETS is a vector. If Nth element is non-nil, it means the
1505 charset whose id is N is already found.
2e344af3 1506
3263d5a2 1507 It may lookup a translation table TABLE if supplied. */
2e344af3 1508
3263d5a2 1509static void
d5172d4f
PE
1510find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1511 ptrdiff_t nbytes, Lisp_Object charsets,
1512 Lisp_Object table, bool multibyte)
3263d5a2 1513{
dbbb237d 1514 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1515
1516 if (nchars == nbytes)
3263d5a2 1517 {
3cc67a4d
KH
1518 if (multibyte)
1519 ASET (charsets, charset_ascii, Qt);
1520 else
1521 while (ptr < pend)
1522 {
1523 int c = *ptr++;
1524
1525 if (!NILP (table))
1526 c = translate_char (table, c);
200fc949 1527 if (ASCII_CHAR_P (c))
3cc67a4d
KH
1528 ASET (charsets, charset_ascii, Qt);
1529 else
1530 ASET (charsets, charset_eight_bit, Qt);
1531 }
1532 }
1533 else
1534 {
1535 while (ptr < pend)
3263d5a2 1536 {
3cc67a4d
KH
1537 int c = STRING_CHAR_ADVANCE (ptr);
1538 struct charset *charset;
3263d5a2 1539
3cc67a4d
KH
1540 if (!NILP (table))
1541 c = translate_char (table, c);
1542 charset = CHAR_CHARSET (c);
1543 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1544 }
4ed46869 1545 }
4ed46869
KH
1546}
1547
1548DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1549 2, 3, 0,
fdb82f93
PJ
1550 doc: /* Return a list of charsets in the region between BEG and END.
1551BEG and END are buffer positions.
1552Optional arg TABLE if non-nil is a translation table to look up.
1553
fdb82f93
PJ
1554If the current buffer is unibyte, the returned list may contain
1555only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1556 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
4ed46869 1557{
3263d5a2 1558 Lisp_Object charsets;
d311d28c 1559 ptrdiff_t from, from_byte, to, stop, stop_byte;
42ca828e 1560 int i;
4ed46869 1561 Lisp_Object val;
d5172d4f 1562 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
4ed46869
KH
1563
1564 validate_region (&beg, &end);
1565 from = XFASTINT (beg);
1566 stop = to = XFASTINT (end);
6ae1f27e 1567
4ed46869 1568 if (from < GPT && GPT < to)
6ae1f27e
RS
1569 {
1570 stop = GPT;
1571 stop_byte = GPT_BYTE;
1572 }
1573 else
1574 stop_byte = CHAR_TO_BYTE (stop);
1575
1576 from_byte = CHAR_TO_BYTE (from);
1577
3263d5a2 1578 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1579 while (1)
1580 {
3263d5a2 1581 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1582 stop_byte - from_byte, charsets, table,
1583 multibyte);
4ed46869 1584 if (stop < to)
6ae1f27e
RS
1585 {
1586 from = stop, from_byte = stop_byte;
1587 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1588 }
4ed46869
KH
1589 else
1590 break;
1591 }
6ae1f27e 1592
4ed46869 1593 val = Qnil;
3263d5a2 1594 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1595 if (!NILP (AREF (charsets, i)))
3263d5a2 1596 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1597 return val;
1598}
1599
1600DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1601 1, 2, 0,
fdb82f93
PJ
1602 doc: /* Return a list of charsets in STR.
1603Optional arg TABLE if non-nil is a translation table to look up.
1604
fdb82f93 1605If STR is unibyte, the returned list may contain
3263d5a2 1606only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1607 (Lisp_Object str, Lisp_Object table)
4ed46869 1608{
3263d5a2 1609 Lisp_Object charsets;
4ed46869
KH
1610 int i;
1611 Lisp_Object val;
1612
b7826503 1613 CHECK_STRING (str);
87b089ad 1614
3263d5a2 1615 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1616 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1617 charsets, table,
1618 STRING_MULTIBYTE (str));
4ed46869 1619 val = Qnil;
3263d5a2 1620 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1621 if (!NILP (AREF (charsets, i)))
3263d5a2 1622 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1623 return val;
1624}
2e344af3 1625
4ed46869 1626\f
3263d5a2 1627
ecca2aad
KH
1628/* Return a unified character code for C (>= 0x110000). VAL is a
1629 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1630 charset symbol. */
41c8bfcf 1631static int
971de7fb 1632maybe_unify_char (int c, Lisp_Object val)
ecca2aad
KH
1633{
1634 struct charset *charset;
1635
1636 if (INTEGERP (val))
ccd9a01a 1637 return XFASTINT (val);
ecca2aad
KH
1638 if (NILP (val))
1639 return c;
1640
1641 CHECK_CHARSET_GET_CHARSET (val, charset);
291d430f 1642#ifdef REL_ALLOC
efc00ab1 1643 /* The call to load_charset below can allocate memory, which screws
291d430f
EZ
1644 callers of this function through STRING_CHAR_* macros that hold C
1645 pointers to buffer text, if REL_ALLOC is used. */
1646 r_alloc_inhibit_buffer_relocation (1);
1647#endif
ecca2aad
KH
1648 load_charset (charset, 1);
1649 if (! inhibit_load_charset_map)
1650 {
1651 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1652 if (! NILP (val))
ccd9a01a 1653 c = XFASTINT (val);
ecca2aad
KH
1654 }
1655 else
1656 {
1657 int code_index = c - CHARSET_CODE_OFFSET (charset);
1658 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1659
1660 if (unified > 0)
1661 c = unified;
1662 }
291d430f
EZ
1663#ifdef REL_ALLOC
1664 r_alloc_inhibit_buffer_relocation (0);
1665#endif
ecca2aad
KH
1666 return c;
1667}
1668
1669
bbd240ce 1670/* Return a character corresponding to the code-point CODE of
3263d5a2
KH
1671 CHARSET. */
1672
1673int
971de7fb 1674decode_char (struct charset *charset, unsigned int code)
4ed46869 1675{
3263d5a2
KH
1676 int c, char_index;
1677 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1678
3263d5a2
KH
1679 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1680 return -1;
4ed46869 1681
374c5cfd 1682 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1683 {
374c5cfd
KH
1684 Lisp_Object subset_info;
1685
1686 subset_info = CHARSET_SUBSET (charset);
1687 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1688 code -= XINT (AREF (subset_info, 3));
1689 if (code < XFASTINT (AREF (subset_info, 1))
1690 || code > XFASTINT (AREF (subset_info, 2)))
1691 c = -1;
1692 else
1693 c = DECODE_CHAR (charset, code);
2e344af3 1694 }
374c5cfd 1695 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1696 {
3263d5a2 1697 Lisp_Object parents;
4ed46869 1698
374c5cfd 1699 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1700 c = -1;
1701 for (; CONSP (parents); parents = XCDR (parents))
1702 {
1703 int id = XINT (XCAR (XCAR (parents)));
1704 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1705 unsigned this_code = code - code_offset;
4ed46869 1706
3263d5a2
KH
1707 charset = CHARSET_FROM_ID (id);
1708 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1709 break;
1710 }
1711 }
1712 else
ac4137cc 1713 {
3263d5a2 1714 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1715 if (char_index < 0)
1716 return -1;
4ed46869 1717
3263d5a2 1718 if (method == CHARSET_METHOD_MAP)
ac4137cc 1719 {
3263d5a2 1720 Lisp_Object decoder;
4ed46869 1721
3263d5a2
KH
1722 decoder = CHARSET_DECODER (charset);
1723 if (! VECTORP (decoder))
ecca2aad
KH
1724 {
1725 load_charset (charset, 1);
1726 decoder = CHARSET_DECODER (charset);
1727 }
1728 if (VECTORP (decoder))
1729 c = XINT (AREF (decoder, char_index));
1730 else
1731 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1732 }
ecca2aad 1733 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1734 {
3263d5a2 1735 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad 1736 if (CHARSET_UNIFIED_P (charset)
41c8bfcf
PE
1737 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1738 {
1739 /* Unify C with a Unicode character if possible. */
1740 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1741 c = maybe_unify_char (c, val);
1742 }
ac4137cc
KH
1743 }
1744 }
4ed46869 1745
3263d5a2 1746 return c;
90d7b74e
KH
1747}
1748
374c5cfd
KH
1749/* Variable used temporarily by the macro ENCODE_CHAR. */
1750Lisp_Object charset_work;
4ed46869 1751
d5172d4f 1752/* Return a code-point of C in CHARSET. If C doesn't belong to
28c026cd
DL
1753 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1754 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1755
3263d5a2 1756unsigned
971de7fb 1757encode_char (struct charset *charset, int c)
9d3d8cba 1758{
3263d5a2
KH
1759 unsigned code;
1760 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1761
3263d5a2 1762 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1763 {
6809ca75 1764 Lisp_Object deunifier;
ecca2aad 1765 int code_index = -1;
4ed46869 1766
3263d5a2
KH
1767 deunifier = CHARSET_DEUNIFIER (charset);
1768 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1769 {
ecca2aad 1770 load_charset (charset, 2);
3263d5a2 1771 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1772 }
ecca2aad
KH
1773 if (CHAR_TABLE_P (deunifier))
1774 {
1775 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1776
1777 if (INTEGERP (deunified))
1778 code_index = XINT (deunified);
1779 }
1780 else
1781 {
1782 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1783 }
1784 if (code_index >= 0)
1785 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1786 }
9d3d8cba 1787
374c5cfd
KH
1788 if (method == CHARSET_METHOD_SUBSET)
1789 {
1790 Lisp_Object subset_info;
1791 struct charset *this_charset;
1792
1793 subset_info = CHARSET_SUBSET (charset);
1794 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1795 code = ENCODE_CHAR (this_charset, c);
1796 if (code == CHARSET_INVALID_CODE (this_charset)
1797 || code < XFASTINT (AREF (subset_info, 1))
1798 || code > XFASTINT (AREF (subset_info, 2)))
1799 return CHARSET_INVALID_CODE (charset);
1800 code += XINT (AREF (subset_info, 3));
1801 return code;
1802 }
9d3d8cba 1803
374c5cfd 1804 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1805 {
3263d5a2 1806 Lisp_Object parents;
d2665018 1807
374c5cfd 1808 parents = CHARSET_SUPERSET (charset);
3263d5a2 1809 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1810 {
3263d5a2
KH
1811 int id = XINT (XCAR (XCAR (parents)));
1812 int code_offset = XINT (XCDR (XCAR (parents)));
1813 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1814
3263d5a2 1815 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1816 if (code != CHARSET_INVALID_CODE (this_charset))
1817 return code + code_offset;
beeedaad 1818 }
3263d5a2
KH
1819 return CHARSET_INVALID_CODE (charset);
1820 }
1bcc1567 1821
15c85a88
KH
1822 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1823 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1824 return CHARSET_INVALID_CODE (charset);
1bcc1567 1825
3263d5a2 1826 if (method == CHARSET_METHOD_MAP)
3f62427c 1827 {
3263d5a2 1828 Lisp_Object encoder;
beeedaad 1829 Lisp_Object val;
9b6a601f 1830
3263d5a2
KH
1831 encoder = CHARSET_ENCODER (charset);
1832 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1833 {
1834 load_charset (charset, 2);
1835 encoder = CHARSET_ENCODER (charset);
1836 }
1837 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1838 {
1839 val = CHAR_TABLE_REF (encoder, c);
1840 if (NILP (val))
1841 return CHARSET_INVALID_CODE (charset);
1842 code = XINT (val);
1843 if (! CHARSET_COMPACT_CODES_P (charset))
1844 code = INDEX_TO_CODE_POINT (charset, code);
1845 }
1846 else
1847 {
1848 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
51b59d79 1849 code = INDEX_TO_CODE_POINT (charset, code);
ecca2aad 1850 }
3263d5a2 1851 }
820ee249 1852 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1853 {
58c8a77d
PE
1854 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1855
1856 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1857 }
8ac5a9cc 1858
3263d5a2 1859 return code;
4ed46869
KH
1860}
1861
4ed46869 1862
3263d5a2
KH
1863DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1864 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1865Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1866
328a8179 1867CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
5842a27b 1868 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
4ed46869 1869{
3263d5a2
KH
1870 int c, id;
1871 unsigned code;
1872 struct charset *charsetp;
859f2b3c 1873
3263d5a2 1874 CHECK_CHARSET_GET_ID (charset, id);
be44ca6c 1875 code = cons_to_unsigned (code_point, UINT_MAX);
3263d5a2
KH
1876 charsetp = CHARSET_FROM_ID (id);
1877 c = DECODE_CHAR (charsetp, code);
1878 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1879}
1880
859f2b3c 1881
3263d5a2
KH
1882DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1883 doc: /* Encode the character CH into a code-point of CHARSET.
328a8179 1884Return nil if CHARSET doesn't include CH. */)
5842a27b 1885 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
4ed46869 1886{
34206dd2 1887 int c, id;
3263d5a2
KH
1888 unsigned code;
1889 struct charset *charsetp;
046b1f03 1890
3263d5a2 1891 CHECK_CHARSET_GET_ID (charset, id);
34206dd2
PE
1892 CHECK_CHARACTER (ch);
1893 c = XFASTINT (ch);
3263d5a2 1894 charsetp = CHARSET_FROM_ID (id);
34206dd2 1895 code = ENCODE_CHAR (charsetp, c);
3263d5a2
KH
1896 if (code == CHARSET_INVALID_CODE (charsetp))
1897 return Qnil;
be44ca6c 1898 return INTEGER_TO_CONS (code);
beeedaad
KH
1899}
1900
beeedaad 1901
b121a744
KH
1902DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1903 doc:
1904 /* Return a character of CHARSET whose position codes are CODEn.
1905
1906CODE1 through CODE4 are optional, but if you don't supply sufficient
1907position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1908is specified. */)
5842a27b 1909 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
beeedaad 1910{
3263d5a2
KH
1911 int id, dimension;
1912 struct charset *charsetp;
b121a744
KH
1913 unsigned code;
1914 int c;
87b089ad 1915
3263d5a2
KH
1916 CHECK_CHARSET_GET_ID (charset, id);
1917 charsetp = CHARSET_FROM_ID (id);
4ed46869 1918
b121a744
KH
1919 dimension = CHARSET_DIMENSION (charsetp);
1920 if (NILP (code1))
d47073ca
KH
1921 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1922 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1923 else
859f2b3c 1924 {
b121a744
KH
1925 CHECK_NATNUM (code1);
1926 if (XFASTINT (code1) >= 0x100)
1927 args_out_of_range (make_number (0xFF), code1);
1928 code = XFASTINT (code1);
859f2b3c 1929
b0a1e45e 1930 if (dimension > 1)
beeedaad 1931 {
b121a744 1932 code <<= 8;
b0a1e45e
KH
1933 if (NILP (code2))
1934 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1935 else
b121a744 1936 {
b0a1e45e
KH
1937 CHECK_NATNUM (code2);
1938 if (XFASTINT (code2) >= 0x100)
1939 args_out_of_range (make_number (0xFF), code2);
1940 code |= XFASTINT (code2);
b121a744 1941 }
99529c2c 1942
b0a1e45e 1943 if (dimension > 2)
b121a744
KH
1944 {
1945 code <<= 8;
b0a1e45e
KH
1946 if (NILP (code3))
1947 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1948 else
1949 {
b0a1e45e
KH
1950 CHECK_NATNUM (code3);
1951 if (XFASTINT (code3) >= 0x100)
1952 args_out_of_range (make_number (0xFF), code3);
1953 code |= XFASTINT (code3);
1954 }
1955
1956 if (dimension > 3)
1957 {
1958 code <<= 8;
1959 if (NILP (code4))
1960 code |= charsetp->code_space[0];
1961 else
1962 {
1963 CHECK_NATNUM (code4);
1964 if (XFASTINT (code4) >= 0x100)
1965 args_out_of_range (make_number (0xFF), code4);
1966 code |= XFASTINT (code4);
1967 }
b121a744
KH
1968 }
1969 }
beeedaad 1970 }
859f2b3c 1971 }
beeedaad 1972
b121a744
KH
1973 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1974 code &= 0x7F7F7F7F;
1975 c = DECODE_CHAR (charsetp, code);
1976 if (c < 0)
1977 error ("Invalid code(s)");
3263d5a2 1978 return make_number (c);
4ed46869
KH
1979}
1980
beeedaad 1981
3263d5a2
KH
1982/* Return the first charset in CHARSET_LIST that contains C.
1983 CHARSET_LIST is a list of charset IDs. If it is nil, use
1984 Vcharset_ordered_list. */
beeedaad 1985
3263d5a2 1986struct charset *
971de7fb 1987char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2e344af3 1988{
d5172d4f 1989 bool maybe_null = 0;
8a1816bb 1990
3263d5a2
KH
1991 if (NILP (charset_list))
1992 charset_list = Vcharset_ordered_list;
8a1816bb
KH
1993 else
1994 maybe_null = 1;
beeedaad 1995
6c652beb 1996 while (CONSP (charset_list))
2e344af3 1997 {
3263d5a2
KH
1998 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1999 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2000
3263d5a2 2001 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2002 {
3263d5a2
KH
2003 if (code_return)
2004 *code_return = code;
2005 return charset;
3f62427c 2006 }
3263d5a2 2007 charset_list = XCDR (charset_list);
c0be27fd
KH
2008 if (! maybe_null
2009 && c <= MAX_UNICODE_CHAR
2010 && EQ (charset_list, Vcharset_non_preferred_head))
6c652beb 2011 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2012 }
8a1816bb
KH
2013 return (maybe_null ? NULL
2014 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2015 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2016}
2017
2e344af3 2018
3263d5a2 2019DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2020 doc:
d0cf2d48 2021 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2022The charset is decided by the current priority order of charsets.
2023A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2024CH in the charset. */)
5842a27b 2025 (Lisp_Object ch)
4ed46869 2026{
3263d5a2
KH
2027 struct charset *charset;
2028 int c, dimension;
2029 unsigned code;
4ed46869
KH
2030 Lisp_Object val;
2031
3263d5a2
KH
2032 CHECK_CHARACTER (ch);
2033 c = XFASTINT (ch);
2034 charset = CHAR_CHARSET (c);
2035 if (! charset)
1088b922 2036 emacs_abort ();
3263d5a2
KH
2037 code = ENCODE_CHAR (charset, c);
2038 if (code == CHARSET_INVALID_CODE (charset))
1088b922 2039 emacs_abort ();
3263d5a2 2040 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2041 for (val = Qnil; dimension > 0; dimension--)
2042 {
2043 val = Fcons (make_number (code & 0xFF), val);
2044 code >>= 8;
2045 }
3263d5a2 2046 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2047}
2048
740f080d 2049
4cb75c4b
KH
2050DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2051 doc: /* Return the charset of highest priority that contains CH.
15c6d837
XF
2052ASCII characters are an exception: for them, this function always
2053returns `ascii'.
4cb75c4b
KH
2054If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2055from which to find the charset. It may also be a coding system. In
2056that case, find the charset from what supported by that coding system. */)
5842a27b 2057 (Lisp_Object ch, Lisp_Object restriction)
4ed46869 2058{
3263d5a2 2059 struct charset *charset;
4ed46869 2060
3263d5a2 2061 CHECK_CHARACTER (ch);
4cb75c4b
KH
2062 if (NILP (restriction))
2063 charset = CHAR_CHARSET (XINT (ch));
2064 else
2065 {
4cb75c4b
KH
2066 if (CONSP (restriction))
2067 {
7a84eee5
KH
2068 int c = XFASTINT (ch);
2069
2070 for (; CONSP (restriction); restriction = XCDR (restriction))
4cb75c4b 2071 {
f6095868 2072 struct charset *rcharset;
4cb75c4b 2073
f6095868
PE
2074 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2075 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
7a84eee5 2076 return XCAR (restriction);
4cb75c4b 2077 }
7a84eee5 2078 return Qnil;
4cb75c4b 2079 }
7a84eee5
KH
2080 restriction = coding_system_charset_list (restriction);
2081 charset = char_charset (XINT (ch), restriction, NULL);
4cb75c4b
KH
2082 if (! charset)
2083 return Qnil;
2084 }
3263d5a2 2085 return (CHARSET_NAME (charset));
4ed46869
KH
2086}
2087
17e7ef1b 2088
3263d5a2
KH
2089DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2090 doc: /*
2091Return charset of a character in the current buffer at position POS.
cd1181db 2092If POS is nil, it defaults to the current point.
3263d5a2 2093If POS is out of range, the value is nil. */)
5842a27b 2094 (Lisp_Object pos)
2e344af3 2095{
3263d5a2
KH
2096 Lisp_Object ch;
2097 struct charset *charset;
046b1f03 2098
3263d5a2
KH
2099 ch = Fchar_after (pos);
2100 if (! INTEGERP (ch))
2101 return ch;
2102 charset = CHAR_CHARSET (XINT (ch));
2103 return (CHARSET_NAME (charset));
6ae1f27e 2104}
9036eb45 2105
87b089ad 2106
3263d5a2
KH
2107DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2108 doc: /*
2109Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2110
2111ISO 2022's designation sequence (escape sequence) distinguishes charsets
2112by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2113whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2114See the documentation of the function `charset-info' for the meanings of
2115DIMENSION, CHARS, and FINAL-CHAR. */)
5842a27b 2116 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
6ae1f27e 2117{
3263d5a2 2118 int id;
d5172d4f 2119 bool chars_flag;
a8a35e61 2120
3263d5a2 2121 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2122 chars_flag = XFASTINT (chars) == 96;
2123 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2124 XFASTINT (final_char));
2125 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2126}
2127
87b089ad 2128
3263d5a2
KH
2129DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2130 0, 0, 0,
2131 doc: /*
ecca2aad
KH
2132Internal use only.
2133Clear temporary charset mapping tables.
2134It should be called only from temacs invoked for dumping. */)
5842a27b 2135 (void)
87b089ad 2136{
ecca2aad 2137 if (temp_charset_work)
87b089ad 2138 {
baad03f0 2139 xfree (temp_charset_work);
ecca2aad 2140 temp_charset_work = NULL;
2e344af3 2141 }
2e344af3 2142
ecca2aad
KH
2143 if (CHAR_TABLE_P (Vchar_unify_table))
2144 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2145
3263d5a2 2146 return Qnil;
740f080d
KH
2147}
2148
8ddf5e57
DL
2149DEFUN ("charset-priority-list", Fcharset_priority_list,
2150 Scharset_priority_list, 0, 1, 0,
2151 doc: /* Return the list of charsets ordered by priority.
2152HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 2153 (Lisp_Object highestp)
2e344af3 2154{
8ddf5e57 2155 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2156
8ddf5e57 2157 if (!NILP (highestp))
16fed1fc 2158 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2159
8ddf5e57 2160 while (!NILP (list))
2e344af3 2161 {
16fed1fc 2162 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2163 list = XCDR (list);
2e344af3 2164 }
8ddf5e57 2165 return Fnreverse (val);
2e344af3
KH
2166}
2167
8ddf5e57
DL
2168DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2169 1, MANY, 0,
2170 doc: /* Assign higher priority to the charsets given as arguments.
2171usage: (set-charset-priority &rest charsets) */)
f66c7cf8 2172 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 2173{
af7c60ca 2174 Lisp_Object new_head, old_list, arglist[2];
321c819c 2175 Lisp_Object list_2022, list_emacs_mule;
f66c7cf8 2176 ptrdiff_t i;
c5101a77 2177 int id;
4ed46869 2178
8ddf5e57 2179 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2180 new_head = Qnil;
8ddf5e57 2181 for (i = 0; i < nargs; i++)
4ed46869 2182 {
8ddf5e57 2183 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2184 if (! NILP (Fmemq (make_number (id), old_list)))
2185 {
2186 old_list = Fdelq (make_number (id), old_list);
2187 new_head = Fcons (make_number (id), new_head);
2188 }
5729c92f 2189 }
8ddf5e57 2190 arglist[0] = Fnreverse (new_head);
6a9c90ec 2191 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2192 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2193 charset_ordered_list_tick++;
5729c92f 2194
6809ca75 2195 charset_unibyte = -1;
321c819c 2196 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2197 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2198 {
e77415b0 2199 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2200 list_2022 = Fcons (XCAR (old_list), list_2022);
2201 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2202 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2203 if (charset_unibyte < 0)
2204 {
2205 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2206
2207 if (CHARSET_DIMENSION (charset) == 1
2208 && CHARSET_ASCII_COMPATIBLE_P (charset)
2209 && CHARSET_MAX_CHAR (charset) >= 0x80)
2210 charset_unibyte = CHARSET_ID (charset);
2211 }
4ed46869 2212 }
321c819c
KH
2213 Viso_2022_charset_list = Fnreverse (list_2022);
2214 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2215 if (charset_unibyte < 0)
2216 charset_unibyte = charset_iso_8859_1;
4ed46869 2217
8ddf5e57 2218 return Qnil;
4ed46869
KH
2219}
2220
d5b33309
KH
2221DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2222 0, 1, 0,
2223 doc: /* Internal use only.
2224Return charset identification number of CHARSET. */)
5842a27b 2225 (Lisp_Object charset)
4ed46869 2226{
d5b33309 2227 int id;
4ed46869 2228
d5b33309
KH
2229 CHECK_CHARSET_GET_ID (charset, id);
2230 return make_number (id);
4ed46869
KH
2231}
2232
7a84eee5
KH
2233struct charset_sort_data
2234{
2235 Lisp_Object charset;
2236 int id;
0065d054 2237 ptrdiff_t priority;
7a84eee5
KH
2238};
2239
2240static int
2241charset_compare (const void *d1, const void *d2)
2242{
2243 const struct charset_sort_data *data1 = d1, *data2 = d2;
0065d054
PE
2244 if (data1->priority != data2->priority)
2245 return data1->priority < data2->priority ? -1 : 1;
2246 return 0;
7a84eee5
KH
2247}
2248
2249DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2250 doc: /* Sort charset list CHARSETS by a priority of each charset.
2251Return the sorted list. CHARSETS is modified by side effects.
2252See also `charset-priority-list' and `set-charset-priority'. */)
2253 (Lisp_Object charsets)
2254{
2255 Lisp_Object len = Flength (charsets);
0065d054
PE
2256 ptrdiff_t n = XFASTINT (len), i, j;
2257 int done;
7a84eee5
KH
2258 Lisp_Object tail, elt, attrs;
2259 struct charset_sort_data *sort_data;
726929c4 2260 int id, min_id = INT_MAX, max_id = INT_MIN;
7a84eee5
KH
2261 USE_SAFE_ALLOCA;
2262
2263 if (n == 0)
2264 return Qnil;
0065d054 2265 SAFE_NALLOCA (sort_data, 1, n);
7a84eee5
KH
2266 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2267 {
2268 elt = XCAR (tail);
2269 CHECK_CHARSET_GET_ATTR (elt, attrs);
2270 sort_data[i].charset = elt;
2271 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
726929c4 2272 if (id < min_id)
7a84eee5 2273 min_id = id;
726929c4 2274 if (id > max_id)
7a84eee5
KH
2275 max_id = id;
2276 }
2277 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2278 done < n && CONSP (tail); tail = XCDR (tail), i++)
2279 {
2280 elt = XCAR (tail);
2281 id = XFASTINT (elt);
2282 if (id >= min_id && id <= max_id)
2283 for (j = 0; j < n; j++)
2284 if (sort_data[j].id == id)
2285 {
2286 sort_data[j].priority = i;
2287 done++;
2288 }
2289 }
2290 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2291 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2292 XSETCAR (tail, sort_data[i].charset);
2293 SAFE_FREE ();
2294 return charsets;
2295}
2296
4ed46869 2297\f
3263d5a2 2298void
971de7fb 2299init_charset (void)
4ed46869 2300{
c8f94403
GM
2301 Lisp_Object tempdir;
2302 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
73dcdb9f 2303 if (! file_accessible_directory_p (SSDATA (tempdir)))
c8f94403 2304 {
63541ed8
GM
2305 /* This used to be non-fatal (dir_warning), but it should not
2306 happen, and if it does sooner or later it will cause some
16905bec 2307 obscure problem (eg bug#6401), so better abort. */
63541ed8
GM
2308 fprintf (stderr, "Error: charsets directory not found:\n\
2309%s\n\
7b9cb544 2310Emacs will not function correctly without the character map files.\n%s\
c8f94403 2311Please check your installation!\n",
7b9cb544
GM
2312 SDATA (tempdir),
2313 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2314variable is set, maybe it has the wrong value?\n" : "");
63541ed8 2315 exit (1);
c8f94403
GM
2316 }
2317
6c6f1994 2318 Vcharset_map_path = list1 (tempdir);
4ed46869
KH
2319}
2320
4ed46869 2321
dfcf069d 2322void
971de7fb 2323init_charset_once (void)
4ed46869
KH
2324{
2325 int i, j, k;
2326
3263d5a2
KH
2327 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2328 for (j = 0; j < ISO_MAX_CHARS; j++)
2329 for (k = 0; k < ISO_MAX_FINAL; k++)
2330 iso_charset_table[i][j][k] = -1;
4ed46869 2331
60383934 2332 for (i = 0; i < 256; i++)
b84ae584 2333 emacs_mule_charset[i] = -1;
4ed46869 2334
7c7dceee
KH
2335 charset_jisx0201_roman = -1;
2336 charset_jisx0208_1978 = -1;
2337 charset_jisx0208 = -1;
d32320c4 2338 charset_ksc5601 = -1;
4ed46869
KH
2339}
2340
2341#ifdef emacs
2342
f701dc2a
PE
2343/* Allocate an initial charset table that is large enough to handle
2344 Emacs while it is bootstrapping. As of September 2011, the size
2345 needs to be at least 166; make it a bit bigger to allow for future
2346 expansion.
2347
2348 Don't make the value so small that the table is reallocated during
2349 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2350 during an initial bootstrap wreak havoc after dumping; see the
2351 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2352 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2353static struct charset charset_table_init[180];
2354
dfcf069d 2355void
971de7fb 2356syms_of_charset (void)
4ed46869 2357{
fe6aa7a1
BT
2358#include "charset.x"
2359
3263d5a2
KH
2360 DEFSYM (Qcharsetp, "charsetp");
2361
2362 DEFSYM (Qascii, "ascii");
2363 DEFSYM (Qunicode, "unicode");
6c652beb 2364 DEFSYM (Qemacs, "emacs");
2fe1edd1 2365 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2366 DEFSYM (Qiso_8859_1, "iso-8859-1");
2367
2368 DEFSYM (Qgl, "gl");
2369 DEFSYM (Qgr, "gr");
2370
3263d5a2
KH
2371 staticpro (&Vcharset_ordered_list);
2372 Vcharset_ordered_list = Qnil;
2373
2374 staticpro (&Viso_2022_charset_list);
2375 Viso_2022_charset_list = Qnil;
2376
2377 staticpro (&Vemacs_mule_charset_list);
2378 Vemacs_mule_charset_list = Qnil;
2379
3943ed76 2380 /* Don't staticpro them here. It's done in syms_of_fns. */
088dcc3e
DN
2381 QCtest = intern_c_string (":test");
2382 Qeq = intern_c_string ("eq");
3943ed76 2383
3263d5a2 2384 staticpro (&Vcharset_hash_table);
8f924df7
KH
2385 {
2386 Lisp_Object args[2];
2387 args[0] = QCtest;
2388 args[1] = Qeq;
2389 Vcharset_hash_table = Fmake_hash_table (2, args);
2390 }
3263d5a2 2391
f701dc2a 2392 charset_table = charset_table_init;
faa52174 2393 charset_table_size = ARRAYELTS (charset_table_init);
3263d5a2
KH
2394 charset_table_used = 0;
2395
29208e82 2396 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
fb7ada5f 2397 doc: /* List of directories to search for charset map files. */);
4beef065 2398 Vcharset_map_path = Qnil;
4ed46869 2399
29208e82 2400 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
ecca2aad
KH
2401 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2402 inhibit_load_charset_map = 0;
2403
29208e82 2404 DEFVAR_LISP ("charset-list", Vcharset_list,
528623a0 2405 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2406 Vcharset_list = Qnil;
2407
29208e82 2408 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
6a9c90ec
KH
2409 doc: /* ISO639 language mnemonic symbol for the current language environment.
2410If the current language environment is for multiple languages (e.g. "Latin-1"),
2411the value may be a list of mnemonics. */);
2412 Vcurrent_iso639_language = Qnil;
2413
2fe1edd1 2414 charset_ascii
12455b2f 2415 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2fe1edd1 2416 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523 2417 charset_iso_8859_1
12455b2f 2418 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
14e3d523 2419 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2420 charset_unicode
12455b2f 2421 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2fe1edd1 2422 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb 2423 charset_emacs
12455b2f 2424 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
6c652beb 2425 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1 2426 charset_eight_bit
12455b2f 2427 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
880820fe 2428 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2429 MAX_5_BYTE_CHAR + 1);
6809ca75 2430 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2431}
2432
2433#endif /* emacs */