duplicate current dir name in smc_save_yourself_CB
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
4ed46869 2
ba318903 3Copyright (C) 2001-2014 Free Software Foundation, Inc.
1f41ee56
GM
4
5Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
9
10Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
4ed46869 13
369314dc
KH
14This file is part of GNU Emacs.
15
9ec0b715 16GNU Emacs is free software: you can redistribute it and/or modify
369314dc 17it under the terms of the GNU General Public License as published by
9ec0b715
GM
18the Free Software Foundation, either version 3 of the License, or
19(at your option) any later version.
4ed46869 20
369314dc
KH
21GNU Emacs is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24GNU General Public License for more details.
4ed46869 25
369314dc 26You should have received a copy of the GNU General Public License
9ec0b715 27along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 28
68c45bf0 29#include <config.h>
68c45bf0 30
f4b1eb36 31#include <errno.h>
4ed46869 32#include <stdio.h>
3263d5a2 33#include <unistd.h>
726929c4 34#include <limits.h>
4ed46869 35#include <sys/types.h>
620f13b0 36#include <c-ctype.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869 43
04c2f2c5 44/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 45
3263d5a2 46 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 47 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
4ed46869 51
3263d5a2
KH
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
04c2f2c5 55 charset_table as a struct charset.
4ed46869 56
3263d5a2 57*/
4ed46869 58
3263d5a2
KH
59/* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61Lisp_Object Vcharset_hash_table;
4ed46869 62
3263d5a2
KH
63/* Table of struct charset. */
64struct charset *charset_table;
4ed46869 65
0065d054 66static ptrdiff_t charset_table_size;
5af5dd92 67static int charset_table_used;
4ed46869 68
3263d5a2 69Lisp_Object Qcharsetp;
4ed46869 70
3263d5a2
KH
71/* Special charset symbols. */
72Lisp_Object Qascii;
955cbe7b
PE
73static Lisp_Object Qeight_bit;
74static Lisp_Object Qiso_8859_1;
75static Lisp_Object Qunicode;
76static Lisp_Object Qemacs;
4ed46869 77
3263d5a2
KH
78/* The corresponding charsets. */
79int charset_ascii;
2fe1edd1 80int charset_eight_bit;
a2cb4e63 81static int charset_iso_8859_1;
3263d5a2 82int charset_unicode;
a2cb4e63 83static int charset_emacs;
b0e3cf2b 84
7c7dceee
KH
85/* The other special charsets. */
86int charset_jisx0201_roman;
87int charset_jisx0208_1978;
88int charset_jisx0208;
d32320c4 89int charset_ksc5601;
c1a08b4c 90
3263d5a2 91/* Value of charset attribute `charset-iso-plane'. */
955cbe7b 92static Lisp_Object Qgl, Qgr;
c1a08b4c 93
d1a04588
KH
94/* Charset of unibyte characters. */
95int charset_unibyte;
4ed46869 96
3263d5a2
KH
97/* List of charsets ordered by the priority. */
98Lisp_Object Vcharset_ordered_list;
4ed46869 99
6a9c90ec
KH
100/* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102Lisp_Object Vcharset_non_preferred_head;
103
dbbb237d 104/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 105 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
106 -1. */
107unsigned short charset_ordered_list_tick;
4ed46869 108
3263d5a2
KH
109/* List of iso-2022 charsets. */
110Lisp_Object Viso_2022_charset_list;
35e623fb 111
3263d5a2
KH
112/* List of emacs-mule charsets. */
113Lisp_Object Vemacs_mule_charset_list;
114
b84ae584 115int emacs_mule_charset[256];
4ed46869
KH
116
117/* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
119int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
69f8de5b
KH
121#define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
3c7649c1 123 ? (int) ((code) - (charset)->min_code) \
69f8de5b
KH
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
3c7649c1
PE
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
3263d5a2
KH
136 : -1)
137
138
3c7649c1
PE
139/* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
3263d5a2 142
820ee249
KH
143#define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
150 << 8) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
153 << 16) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
155 << 24))))
4cf9710d 156
ecca2aad
KH
157/* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
8a73a704 159
ecca2aad
KH
160static struct
161{
162 /* The current charset for which the following tables are setup. */
163 struct charset *current;
164
165 /* 1 iff the following table is used for encoder. */
166 short for_encoder;
167
0b381c7e
JB
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
ecca2aad
KH
170 int min_char, max_char;
171
e4769531 172 /* A Unicode character corresponding to the code index 0 (i.e. the
ecca2aad 173 minimum code-point) of the current charset, or -1 if the code
e4769531 174 index 0 is not a Unicode character. This is checked when
ecca2aad
KH
175 table.encoder[CHAR] is zero. */
176 int zero_index_char;
177
178 union {
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder[0x20000];
189 } table;
190} *temp_charset_work;
191
192#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
193 do { \
194 if ((CODE) == 0) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
198 else \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
200 } while (0)
201
202#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
208
209#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
211
212#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
046b1f03 214\f
93bcb785 215
e9ce014c
KH
216/* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
d5172d4f 218bool charset_map_loaded;
4ed46869 219
e9ce014c 220struct charset_map_entries
4ed46869 221{
e9ce014c
KH
222 struct {
223 unsigned from, to;
224 int c;
225 } entry[0x10000];
226 struct charset_map_entries *next;
227};
228
ecca2aad
KH
229/* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
232
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
235
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
4cf9710d 238
ecca2aad
KH
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 244
ecca2aad 245 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 246
ecca2aad
KH
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
252*/
4ed46869 253
3263d5a2 254static void
971de7fb 255load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
4ed46869 256{
89ef49df 257 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
3263d5a2 258 unsigned max_code = CHARSET_MAX_CODE (charset);
d5172d4f 259 bool ascii_compatible_p = charset->ascii_compatible_p;
3263d5a2 260 int min_char, max_char, nonascii_min_char;
3263d5a2 261 int i;
3263d5a2 262 unsigned char *fast_map = charset->fast_map;
99529c2c 263
e9ce014c
KH
264 if (n_entries <= 0)
265 return;
266
ecca2aad 267 if (control_flag)
8ac5a9cc 268 {
ecca2aad
KH
269 if (! inhibit_load_charset_map)
270 {
271 if (control_flag == 1)
272 {
273 if (charset->method == CHARSET_METHOD_MAP)
274 {
275 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 276
4939150c
PE
277 vec = Fmake_vector (make_number (n), make_number (-1));
278 set_charset_attr (charset, charset_decoder, vec);
ecca2aad
KH
279 }
280 else
281 {
282 char_table_set_range (Vchar_unify_table,
283 charset->min_char, charset->max_char,
284 Qnil);
285 }
286 }
287 else
288 {
289 table = Fmake_char_table (Qnil, Qnil);
4939150c
PE
290 set_charset_attr (charset,
291 (charset->method == CHARSET_METHOD_MAP
292 ? charset_encoder : charset_deunifier),
293 table);
ecca2aad
KH
294 }
295 }
296 else
297 {
298 if (! temp_charset_work)
38182d90 299 temp_charset_work = xmalloc (sizeof *temp_charset_work);
ecca2aad
KH
300 if (control_flag == 1)
301 {
302 memset (temp_charset_work->table.decoder, -1,
303 sizeof (int) * 0x10000);
ecca2aad
KH
304 }
305 else
306 {
307 memset (temp_charset_work->table.encoder, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work->zero_index_char = -1;
310 }
311 temp_charset_work->current = charset;
312 temp_charset_work->for_encoder = (control_flag == 2);
313 control_flag += 2;
314 }
3263d5a2 315 charset_map_loaded = 1;
2e344af3 316 }
6662e69b 317
e9ce014c 318 min_char = max_char = entries->entry[0].c;
3263d5a2 319 nonascii_min_char = MAX_CHAR;
e9ce014c 320 for (i = 0; i < n_entries; i++)
2e344af3 321 {
e9ce014c 322 unsigned from, to;
3befa583 323 int from_index, to_index, lim_index;
3b4f4446 324 int from_c, to_c;
e9ce014c 325 int idx = i % 0x10000;
3263d5a2 326
e9ce014c
KH
327 if (i > 0 && idx == 0)
328 entries = entries->next;
329 from = entries->entry[idx].from;
330 to = entries->entry[idx].to;
3b4f4446
KH
331 from_c = entries->entry[idx].c;
332 from_index = CODE_POINT_TO_INDEX (charset, from);
333 if (from == to)
6662e69b 334 {
3b4f4446
KH
335 to_index = from_index;
336 to_c = from_c;
6662e69b 337 }
3b4f4446 338 else
6662e69b 339 {
3b4f4446
KH
340 to_index = CODE_POINT_TO_INDEX (charset, to);
341 to_c = from_c + (to_index - from_index);
6662e69b 342 }
3b4f4446
KH
343 if (from_index < 0 || to_index < 0)
344 continue;
3befa583 345 lim_index = to_index + 1;
3263d5a2 346
ecca2aad
KH
347 if (to_c > max_char)
348 max_char = to_c;
349 else if (from_c < min_char)
350 min_char = from_c;
6662e69b 351
ecca2aad
KH
352 if (control_flag == 1)
353 {
354 if (charset->method == CHARSET_METHOD_MAP)
3befa583 355 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
356 ASET (vec, from_index, make_number (from_c));
357 else
3befa583 358 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
359 CHAR_TABLE_SET (Vchar_unify_table,
360 CHARSET_CODE_OFFSET (charset) + from_index,
361 make_number (from_c));
362 }
363 else if (control_flag == 2)
364 {
365 if (charset->method == CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset))
3befa583 367 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad 368 {
3c7649c1
PE
369 unsigned code = from_index;
370 code = INDEX_TO_CODE_POINT (charset, code);
ecca2aad
KH
371
372 if (NILP (CHAR_TABLE_REF (table, from_c)))
373 CHAR_TABLE_SET (table, from_c, make_number (code));
374 }
375 else
3befa583 376 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
377 {
378 if (NILP (CHAR_TABLE_REF (table, from_c)))
379 CHAR_TABLE_SET (table, from_c, make_number (from_index));
380 }
381 }
382 else if (control_flag == 3)
3befa583 383 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
384 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
385 else if (control_flag == 4)
3befa583 386 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
388 else /* control_flag == 0 */
389 {
3b4f4446
KH
390 if (ascii_compatible_p)
391 {
200fc949 392 if (! ASCII_CHAR_P (from_c))
3b4f4446
KH
393 {
394 if (from_c < nonascii_min_char)
395 nonascii_min_char = from_c;
396 }
200fc949 397 else if (! ASCII_CHAR_P (to_c))
3b4f4446
KH
398 {
399 nonascii_min_char = 0x80;
400 }
401 }
177c0ea7 402
ecca2aad
KH
403 for (; from_c <= to_c; from_c++)
404 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 405 }
8ac5a9cc 406 }
3263d5a2 407
ecca2aad 408 if (control_flag == 0)
4ed46869 409 {
3263d5a2
KH
410 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
411 ? nonascii_min_char : min_char);
412 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 413 }
ecca2aad
KH
414 else if (control_flag == 4)
415 {
416 temp_charset_work->min_char = min_char;
417 temp_charset_work->max_char = max_char;
418 }
4ed46869
KH
419}
420
12bcae05 421
3263d5a2 422/* Read a hexadecimal number (preceded by "0x") from the file FP while
3b59c351 423 paying attention to comment character '#'. */
12bcae05 424
b0ab8123 425static unsigned
d5172d4f 426read_hex (FILE *fp, bool *eof, bool *overflow)
12bcae05 427{
3263d5a2
KH
428 int c;
429 unsigned n;
12bcae05 430
3263d5a2
KH
431 while ((c = getc (fp)) != EOF)
432 {
69f8de5b 433 if (c == '#')
3263d5a2
KH
434 {
435 while ((c = getc (fp)) != EOF && c != '\n');
436 }
437 else if (c == '0')
438 {
439 if ((c = getc (fp)) == EOF || c == 'x')
440 break;
441 }
8f924df7 442 }
3263d5a2
KH
443 if (c == EOF)
444 {
445 *eof = 1;
446 return 0;
447 }
3263d5a2 448 n = 0;
620f13b0 449 while (c_isxdigit (c = getc (fp)))
caeeedc1
PE
450 {
451 if (UINT_MAX >> 4 < n)
452 *overflow = 1;
3263d5a2 453 n = ((n << 4)
caeeedc1
PE
454 | (c - ('0' <= c && c <= '9' ? '0'
455 : 'A' <= c && c <= 'F' ? 'A' - 10
456 : 'a' - 10)));
457 }
e9ce014c
KH
458 if (c != EOF)
459 ungetc (c, fp);
3263d5a2
KH
460 return n;
461}
12bcae05 462
3263d5a2 463/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
464 Each line of MAPFILE has this form
465 0xAAAA 0xCCCC
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
468 0xAAAA-0xBBBB 0xCCCC
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
4ed46869 471
3263d5a2
KH
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 474 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
475 range.
476
18a10a21
JB
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 479
e9ce014c 480static void
f4b1eb36
PE
481load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
482 int control_flag)
4ed46869 483{
e9ce014c
KH
484 unsigned min_code = CHARSET_MIN_CODE (charset);
485 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
486 int fd;
487 FILE *fp;
3263d5a2 488 Lisp_Object suffixes;
e9ce014c 489 struct charset_map_entries *head, *entries;
d311d28c
PE
490 int n_entries;
491 ptrdiff_t count;
4ed46869 492
6c6f1994 493 suffixes = list2 (build_string (".map"), build_string (".TXT"));
4ed46869 494
a2f3eb19 495 count = SPECPDL_INDEX ();
f4b1eb36 496 record_unwind_protect_nothing ();
dde2559c 497 specbind (Qfile_name_handler_alist, Qnil);
518c40a2 498 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
f4b1eb36
PE
499 fp = fd < 0 ? 0 : fdopen (fd, "r");
500 if (!fp)
501 {
502 int open_errno = errno;
503 emacs_close (fd);
504 report_file_errno ("Loading charset map", mapfile, open_errno);
505 }
506 set_unwind_protect_ptr (count, fclose_unwind, fp);
507 unbind_to (count + 1, Qnil);
4ed46869 508
f4b1eb36 509 /* Use record_xmalloc, as `charset_map_entries' is
a2f3eb19 510 large (larger than MAX_ALLOCA). */
f4b1eb36 511 head = record_xmalloc (sizeof *head);
a2f3eb19 512 entries = head;
72af86bd 513 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 514
e9ce014c 515 n_entries = 0;
3263d5a2
KH
516 while (1)
517 {
caeeedc1 518 unsigned from, to, c;
e9ce014c 519 int idx;
d5172d4f 520 bool eof = 0, overflow = 0;
4ed46869 521
caeeedc1 522 from = read_hex (fp, &eof, &overflow);
3263d5a2
KH
523 if (eof)
524 break;
e9ce014c 525 if (getc (fp) == '-')
caeeedc1 526 to = read_hex (fp, &eof, &overflow);
e9ce014c
KH
527 else
528 to = from;
caeeedc1
PE
529 if (eof)
530 break;
531 c = read_hex (fp, &eof, &overflow);
532 if (eof)
533 break;
ac4137cc 534
caeeedc1
PE
535 if (overflow)
536 continue;
e9ce014c
KH
537 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
538 continue;
ac4137cc 539
f4b1eb36 540 if (n_entries == 0x10000)
3263d5a2 541 {
f4b1eb36 542 entries->next = record_xmalloc (sizeof *entries->next);
e9ce014c 543 entries = entries->next;
72af86bd 544 memset (entries, 0, sizeof (struct charset_map_entries));
d311d28c 545 n_entries = 0;
3263d5a2 546 }
d311d28c 547 idx = n_entries;
e9ce014c
KH
548 entries->entry[idx].from = from;
549 entries->entry[idx].to = to;
550 entries->entry[idx].c = c;
551 n_entries++;
3263d5a2
KH
552 }
553 fclose (fp);
f4b1eb36 554 clear_unwind_protect (count);
177c0ea7 555
e9ce014c 556 load_charset_map (charset, head, n_entries, control_flag);
f4b1eb36 557 unbind_to (count, Qnil);
4ed46869
KH
558}
559
e9ce014c 560static void
971de7fb 561load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
23d2a7f1 562{
e9ce014c
KH
563 unsigned min_code = CHARSET_MIN_CODE (charset);
564 unsigned max_code = CHARSET_MAX_CODE (charset);
565 struct charset_map_entries *head, *entries;
566 int n_entries;
567 int len = ASIZE (vec);
568 int i;
a2f3eb19 569 USE_SAFE_ALLOCA;
23d2a7f1 570
e9ce014c 571 if (len % 2 == 1)
3263d5a2 572 {
e9ce014c
KH
573 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
574 return;
3263d5a2 575 }
35e623fb 576
a2f3eb19
CY
577 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
578 large (larger than MAX_ALLOCA). */
98c6f1e3 579 head = SAFE_ALLOCA (sizeof *head);
a2f3eb19 580 entries = head;
72af86bd 581 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 582
e9ce014c
KH
583 n_entries = 0;
584 for (i = 0; i < len; i += 2)
35e623fb 585 {
e9ce014c
KH
586 Lisp_Object val, val2;
587 unsigned from, to;
d311d28c 588 EMACS_INT c;
e9ce014c 589 int idx;
d2665018 590
e9ce014c
KH
591 val = AREF (vec, i);
592 if (CONSP (val))
bbf12bb3 593 {
e9ce014c
KH
594 val2 = XCDR (val);
595 val = XCAR (val);
e9ce014c
KH
596 from = XFASTINT (val);
597 to = XFASTINT (val2);
bbf12bb3 598 }
e9ce014c 599 else
d311d28c 600 from = to = XFASTINT (val);
e9ce014c
KH
601 val = AREF (vec, i + 1);
602 CHECK_NATNUM (val);
603 c = XFASTINT (val);
76d7b829 604
e9ce014c
KH
605 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
606 continue;
76d7b829 607
dcc694d7 608 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 609 {
98c6f1e3 610 entries->next = SAFE_ALLOCA (sizeof *entries->next);
e9ce014c 611 entries = entries->next;
72af86bd 612 memset (entries, 0, sizeof (struct charset_map_entries));
e9ce014c
KH
613 }
614 idx = n_entries % 0x10000;
615 entries->entry[idx].from = from;
616 entries->entry[idx].to = to;
617 entries->entry[idx].c = c;
618 n_entries++;
619 }
76d7b829 620
e9ce014c 621 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 622 SAFE_FREE ();
ac4137cc
KH
623}
624
ecca2aad
KH
625
626/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
627 map it is (see the comment of load_charset_map for the detail). */
628
3263d5a2 629static void
971de7fb 630load_charset (struct charset *charset, int control_flag)
76d7b829 631{
ecca2aad 632 Lisp_Object map;
76d7b829 633
ecca2aad
KH
634 if (inhibit_load_charset_map
635 && temp_charset_work
636 && charset == temp_charset_work->current
56f00ed2 637 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
638 return;
639
640 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
641 map = CHARSET_MAP (charset);
0ac2c299
PE
642 else
643 {
644 if (! CHARSET_UNIFIED_P (charset))
1088b922 645 emacs_abort ();
0ac2c299
PE
646 map = CHARSET_UNIFY_MAP (charset);
647 }
ecca2aad
KH
648 if (STRINGP (map))
649 load_charset_map_from_file (charset, map, control_flag);
650 else
651 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 652}
76d7b829 653
3263d5a2
KH
654
655DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
656 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
5842a27b 657 (Lisp_Object object)
23d2a7f1 658{
3263d5a2 659 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
660}
661
4ed46869 662
a2cb4e63
PE
663static void
664map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
665 Lisp_Object function, Lisp_Object arg,
666 unsigned int from, unsigned int to)
ecca2aad
KH
667{
668 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
669 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
670 Lisp_Object range;
671 int c, stop;
672 struct gcpro gcpro1;
673
674 range = Fcons (Qnil, Qnil);
675 GCPRO1 (range);
676
677 c = temp_charset_work->min_char;
678 stop = (temp_charset_work->max_char < 0x20000
679 ? temp_charset_work->max_char : 0xFFFF);
51b59d79 680
ecca2aad
KH
681 while (1)
682 {
f6095868 683 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
ecca2aad 684
f6095868 685 if (idx >= from_idx && idx <= to_idx)
ecca2aad
KH
686 {
687 if (NILP (XCAR (range)))
688 XSETCAR (range, make_number (c));
689 }
690 else if (! NILP (XCAR (range)))
691 {
692 XSETCDR (range, make_number (c - 1));
693 if (c_function)
694 (*c_function) (arg, range);
695 else
696 call2 (function, range, arg);
697 XSETCAR (range, Qnil);
698 }
699 if (c == stop)
700 {
701 if (c == temp_charset_work->max_char)
702 {
703 if (! NILP (XCAR (range)))
704 {
705 XSETCDR (range, make_number (c));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
710 }
711 break;
712 }
713 c = 0x1FFFF;
714 stop = temp_charset_work->max_char;
715 }
716 c++;
717 }
c542407d 718 UNGCPRO;
ecca2aad
KH
719}
720
4ed46869 721void
6f704c76
DN
722map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
723 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
4ed46869 724{
3263d5a2 725 Lisp_Object range;
d5172d4f
PE
726 bool partial = (from > CHARSET_MIN_CODE (charset)
727 || to < CHARSET_MAX_CODE (charset));
374c5cfd 728
3263d5a2 729 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 730 {
374c5cfd
KH
731 int from_idx = CODE_POINT_TO_INDEX (charset, from);
732 int to_idx = CODE_POINT_TO_INDEX (charset, to);
733 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
734 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
735
ecca2aad
KH
736 if (CHARSET_UNIFIED_P (charset))
737 {
738 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
739 load_charset (charset, 2);
740 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
741 map_char_table_for_charset (c_function, function,
742 CHARSET_DEUNIFIER (charset), arg,
743 partial ? charset : NULL, from, to);
744 else
745 map_charset_for_dump (c_function, function, arg, from, to);
746 }
747
374c5cfd 748 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 749 if (NILP (function))
5af5dd92 750 (*c_function) (arg, range);
3263d5a2
KH
751 else
752 call2 (function, range, arg);
c83ef371 753 }
3263d5a2
KH
754 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
755 {
756 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
757 load_charset (charset, 2);
758 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
759 map_char_table_for_charset (c_function, function,
760 CHARSET_ENCODER (charset), arg,
761 partial ? charset : NULL, from, to);
762 else
763 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 764 }
374c5cfd 765 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 766 {
374c5cfd
KH
767 Lisp_Object subset_info;
768 int offset;
769
770 subset_info = CHARSET_SUBSET (charset);
771 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
772 offset = XINT (AREF (subset_info, 3));
773 from -= offset;
774 if (from < XFASTINT (AREF (subset_info, 1)))
775 from = XFASTINT (AREF (subset_info, 1));
776 to -= offset;
777 if (to > XFASTINT (AREF (subset_info, 2)))
778 to = XFASTINT (AREF (subset_info, 2));
779 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 780 }
374c5cfd
KH
781 else /* i.e. CHARSET_METHOD_SUPERSET */
782 {
783 Lisp_Object parents;
4ed46869 784
374c5cfd
KH
785 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
786 parents = XCDR (parents))
bbf12bb3 787 {
374c5cfd
KH
788 int offset;
789 unsigned this_from, this_to;
790
791 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
792 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
793 this_from = from > offset ? from - offset : 0;
794 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
795 if (this_from < CHARSET_MIN_CODE (charset))
796 this_from = CHARSET_MIN_CODE (charset);
797 if (this_to > CHARSET_MAX_CODE (charset))
798 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
799 map_charset_chars (c_function, function, arg, charset,
800 this_from, this_to);
bbf12bb3 801 }
35e623fb 802 }
4ed46869
KH
803}
804
374c5cfd 805DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 806 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 807FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 808argument ARG.
4ed46869 809
374c5cfd
KH
810RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
811characters contained in CHARSET.
4ed46869 812
374c5cfd 813The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 814range of code points (in CHARSET) of target characters. */)
5842a27b 815 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
4ed46869 816{
374c5cfd 817 struct charset *cs;
16fed1fc 818 unsigned from, to;
4ed46869 819
374c5cfd
KH
820 CHECK_CHARSET_GET_CHARSET (charset, cs);
821 if (NILP (from_code))
16fed1fc 822 from = CHARSET_MIN_CODE (cs);
970b7474 823 else
4ed46869 824 {
970b7474
KH
825 from = XINT (from_code);
826 if (from < CHARSET_MIN_CODE (cs))
827 from = CHARSET_MIN_CODE (cs);
4ed46869 828 }
374c5cfd 829 if (NILP (to_code))
970b7474 830 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
831 else
832 {
970b7474
KH
833 to = XINT (to_code);
834 if (to > CHARSET_MAX_CODE (cs))
835 to = CHARSET_MAX_CODE (cs);
4ed46869 836 }
16fed1fc 837 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 838 return Qnil;
35e623fb 839}
4ed46869 840
4ed46869 841
3263d5a2
KH
842/* Define a charset according to the arguments. The Nth argument is
843 the Nth attribute of the charset (the last attribute `charset-id'
844 is not included). See the docstring of `define-charset' for the
845 detail. */
4ed46869 846
3263d5a2
KH
847DEFUN ("define-charset-internal", Fdefine_charset_internal,
848 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
849 doc: /* For internal use only.
850usage: (define-charset-internal ...) */)
f66c7cf8 851 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 852{
3263d5a2
KH
853 /* Charset attr vector. */
854 Lisp_Object attrs;
855 Lisp_Object val;
0de4bb68 856 EMACS_UINT hash_code;
3263d5a2 857 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 858 int i, j;
3263d5a2
KH
859 struct charset charset;
860 int id;
861 int dimension;
d5172d4f 862 bool new_definition_p;
3263d5a2
KH
863 int nchars;
864
865 if (nargs != charset_arg_max)
866 return Fsignal (Qwrong_number_of_arguments,
867 Fcons (intern ("define-charset-internal"),
868 make_number (nargs)));
869
870 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
871
872 CHECK_SYMBOL (args[charset_arg_name]);
873 ASET (attrs, charset_name, args[charset_arg_name]);
874
875 val = args[charset_arg_code_space];
c032b5f8 876 for (i = 0, dimension = 0, nchars = 1; ; i++)
76d7b829 877 {
d311d28c 878 Lisp_Object min_byte_obj, max_byte_obj;
3263d5a2
KH
879 int min_byte, max_byte;
880
d311d28c
PE
881 min_byte_obj = Faref (val, make_number (i * 2));
882 max_byte_obj = Faref (val, make_number (i * 2 + 1));
af5a5a98 883 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
d311d28c 884 min_byte = XINT (min_byte_obj);
af5a5a98 885 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
d311d28c 886 max_byte = XINT (max_byte_obj);
3263d5a2
KH
887 charset.code_space[i * 4] = min_byte;
888 charset.code_space[i * 4 + 1] = max_byte;
889 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
3263d5a2
KH
890 if (max_byte > 0)
891 dimension = i + 1;
c032b5f8
PE
892 if (i == 3)
893 break;
894 nchars *= charset.code_space[i * 4 + 2];
895 charset.code_space[i * 4 + 3] = nchars;
3263d5a2 896 }
4ed46869 897
3263d5a2
KH
898 val = args[charset_arg_dimension];
899 if (NILP (val))
900 charset.dimension = dimension;
901 else
4ed46869 902 {
af5a5a98 903 CHECK_RANGED_INTEGER (val, 1, 4);
3263d5a2 904 charset.dimension = XINT (val);
4ed46869
KH
905 }
906
3263d5a2
KH
907 charset.code_linear_p
908 = (charset.dimension == 1
909 || (charset.code_space[2] == 256
910 && (charset.dimension == 2
911 || (charset.code_space[6] == 256
912 && (charset.dimension == 3
913 || charset.code_space[10] == 256)))));
914
69f8de5b 915 if (! charset.code_linear_p)
4ed46869 916 {
23f86fce 917 charset.code_space_mask = xzalloc (256);
69f8de5b
KH
918 for (i = 0; i < 4; i++)
919 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
920 j++)
921 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
922 }
923
3263d5a2 924 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 925
3263d5a2
KH
926 charset.min_code = (charset.code_space[0]
927 | (charset.code_space[4] << 8)
928 | (charset.code_space[8] << 16)
3c7649c1 929 | ((unsigned) charset.code_space[12] << 24));
3263d5a2
KH
930 charset.max_code = (charset.code_space[1]
931 | (charset.code_space[5] << 8)
932 | (charset.code_space[9] << 16)
3c7649c1 933 | ((unsigned) charset.code_space[13] << 24));
820ee249 934 charset.char_index_offset = 0;
8a73a704 935
820ee249
KH
936 val = args[charset_arg_min_code];
937 if (! NILP (val))
938 {
be44ca6c 939 unsigned code = cons_to_unsigned (val, UINT_MAX);
fdb82f93 940
820ee249
KH
941 if (code < charset.min_code
942 || code > charset.max_code)
3c7649c1
PE
943 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
944 make_fixnum_or_float (charset.max_code), val);
820ee249
KH
945 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
946 charset.min_code = code;
947 }
3fac5a51 948
820ee249
KH
949 val = args[charset_arg_max_code];
950 if (! NILP (val))
3fac5a51 951 {
be44ca6c 952 unsigned code = cons_to_unsigned (val, UINT_MAX);
820ee249 953
820ee249
KH
954 if (code < charset.min_code
955 || code > charset.max_code)
3c7649c1
PE
956 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
957 make_fixnum_or_float (charset.max_code), val);
820ee249 958 charset.max_code = code;
3fac5a51 959 }
3fac5a51 960
ecca2aad 961 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 962
3263d5a2
KH
963 val = args[charset_arg_invalid_code];
964 if (NILP (val))
965 {
966 if (charset.min_code > 0)
967 charset.invalid_code = 0;
bbf12bb3
KH
968 else
969 {
3c7649c1 970 if (charset.max_code < UINT_MAX)
3263d5a2
KH
971 charset.invalid_code = charset.max_code + 1;
972 else
973 error ("Attribute :invalid-code must be specified");
76d7b829 974 }
76d7b829 975 }
3263d5a2 976 else
3c7649c1 977 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
4ed46869 978
3263d5a2
KH
979 val = args[charset_arg_iso_final];
980 if (NILP (val))
981 charset.iso_final = -1;
982 else
983 {
984 CHECK_NUMBER (val);
985 if (XINT (val) < '0' || XINT (val) > 127)
c2982e87 986 error ("Invalid iso-final-char: %"pI"d", XINT (val));
3263d5a2
KH
987 charset.iso_final = XINT (val);
988 }
4ed46869 989
3263d5a2
KH
990 val = args[charset_arg_iso_revision];
991 if (NILP (val))
992 charset.iso_revision = -1;
993 else
4ed46869 994 {
af5a5a98 995 CHECK_RANGED_INTEGER (val, -1, 63);
3263d5a2 996 charset.iso_revision = XINT (val);
4ed46869 997 }
4ed46869 998
3263d5a2
KH
999 val = args[charset_arg_emacs_mule_id];
1000 if (NILP (val))
1001 charset.emacs_mule_id = -1;
4ed46869
KH
1002 else
1003 {
3263d5a2
KH
1004 CHECK_NATNUM (val);
1005 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
c2982e87 1006 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
3263d5a2 1007 charset.emacs_mule_id = XINT (val);
c83ef371 1008 }
f6302ac9 1009
3263d5a2 1010 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1011
3263d5a2 1012 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1013
3263d5a2
KH
1014 charset.unified_p = 0;
1015
72af86bd 1016 memset (charset.fast_map, 0, sizeof (charset.fast_map));
3263d5a2
KH
1017
1018 if (! NILP (args[charset_arg_code_offset]))
1019 {
1020 val = args[charset_arg_code_offset];
3c7649c1 1021 CHECK_CHARACTER (val);
3263d5a2
KH
1022
1023 charset.method = CHARSET_METHOD_OFFSET;
1024 charset.code_offset = XINT (val);
1025
3263d5a2 1026 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
3c7649c1 1027 if (MAX_CHAR - charset.code_offset < i)
3263d5a2 1028 error ("Unsupported max char: %d", charset.max_char);
3c7649c1
PE
1029 charset.max_char = i + charset.code_offset;
1030 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1031 charset.min_char = i + charset.code_offset;
3263d5a2 1032
f148205f
KH
1033 i = (charset.min_char >> 7) << 7;
1034 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1035 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1036 i = (i >> 12) << 12;
3263d5a2
KH
1037 for (; i <= charset.max_char; i += 0x1000)
1038 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1039 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1040 charset.ascii_compatible_p = 1;
3263d5a2
KH
1041 }
1042 else if (! NILP (args[charset_arg_map]))
1043 {
1044 val = args[charset_arg_map];
1045 ASET (attrs, charset_map, val);
ecca2aad 1046 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1047 }
374c5cfd 1048 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1049 {
374c5cfd
KH
1050 Lisp_Object parent;
1051 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1052 struct charset *parent_charset;
1053
1054 val = args[charset_arg_subset];
1055 parent = Fcar (val);
1056 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1057 parent_min_code = Fnth (make_number (1), val);
1058 CHECK_NATNUM (parent_min_code);
1059 parent_max_code = Fnth (make_number (2), val);
1060 CHECK_NATNUM (parent_max_code);
1061 parent_code_offset = Fnth (make_number (3), val);
1062 CHECK_NUMBER (parent_code_offset);
25721f5b 1063 val = make_uninit_vector (4);
374c5cfd
KH
1064 ASET (val, 0, make_number (parent_charset->id));
1065 ASET (val, 1, parent_min_code);
1066 ASET (val, 2, parent_max_code);
1067 ASET (val, 3, parent_code_offset);
1068 ASET (attrs, charset_subset, val);
1069
1070 charset.method = CHARSET_METHOD_SUBSET;
1071 /* Here, we just copy the parent's fast_map. It's not accurate,
1072 but at least it works for quickly detecting which character
1073 DOESN'T belong to this charset. */
1074 for (i = 0; i < 190; i++)
1075 charset.fast_map[i] = parent_charset->fast_map[i];
1076
1077 /* We also copy these for parents. */
1078 charset.min_char = parent_charset->min_char;
1079 charset.max_char = parent_charset->max_char;
1080 }
1081 else if (! NILP (args[charset_arg_superset]))
0282eb69 1082 {
374c5cfd
KH
1083 val = args[charset_arg_superset];
1084 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1085 val = Fcopy_sequence (val);
374c5cfd 1086 ASET (attrs, charset_superset, val);
3263d5a2
KH
1087
1088 charset.min_char = MAX_CHAR;
1089 charset.max_char = 0;
1090 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1091 {
3263d5a2
KH
1092 Lisp_Object elt, car_part, cdr_part;
1093 int this_id, offset;
1094 struct charset *this_charset;
2e344af3 1095
3263d5a2
KH
1096 elt = Fcar (val);
1097 if (CONSP (elt))
2e344af3 1098 {
3263d5a2
KH
1099 car_part = XCAR (elt);
1100 cdr_part = XCDR (elt);
1101 CHECK_CHARSET_GET_ID (car_part, this_id);
d311d28c 1102 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
3263d5a2 1103 offset = XINT (cdr_part);
177c0ea7 1104 }
3263d5a2 1105 else
4ed46869 1106 {
3263d5a2
KH
1107 CHECK_CHARSET_GET_ID (elt, this_id);
1108 offset = 0;
4ed46869 1109 }
3263d5a2
KH
1110 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1111
1112 this_charset = CHARSET_FROM_ID (this_id);
1113 if (charset.min_char > this_charset->min_char)
1114 charset.min_char = this_charset->min_char;
1115 if (charset.max_char < this_charset->max_char)
1116 charset.max_char = this_charset->max_char;
1117 for (i = 0; i < 190; i++)
1118 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1119 }
0282eb69 1120 }
2e344af3 1121 else
3263d5a2 1122 error ("None of :code-offset, :map, :parents are specified");
05505664 1123
3263d5a2
KH
1124 val = args[charset_arg_unify_map];
1125 if (! NILP (val) && !STRINGP (val))
1126 CHECK_VECTOR (val);
1127 ASET (attrs, charset_unify_map, val);
05505664 1128
3263d5a2
KH
1129 CHECK_LIST (args[charset_arg_plist]);
1130 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1131
3263d5a2
KH
1132 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1133 &hash_code);
1134 if (charset.hash_index >= 0)
1135 {
1136 new_definition_p = 0;
4f65af01 1137 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
e83064be 1138 set_hash_value_slot (hash_table, charset.hash_index, attrs);
3263d5a2 1139 }
1a45ff10 1140 else
3263d5a2
KH
1141 {
1142 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1143 hash_code);
1144 if (charset_table_used == charset_table_size)
1145 {
e097a6fa 1146 /* Ensure that charset IDs fit into 'int' as well as into the
0065d054
PE
1147 restriction imposed by fixnums. Although the 'int' restriction
1148 could be removed, too much other code would need altering; for
1149 example, the IDs are stuffed into struct
1150 coding_system.charbuf[i] entries, which are 'int'. */
1151 int old_size = charset_table_size;
2dd2e622 1152 ptrdiff_t new_size = old_size;
0065d054 1153 struct charset *new_table =
2dd2e622 1154 xpalloc (0, &new_size, 1,
0065d054
PE
1155 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1156 sizeof *charset_table);
1157 memcpy (new_table, charset_table, old_size * sizeof *new_table);
2fe1edd1 1158 charset_table = new_table;
2dd2e622 1159 charset_table_size = new_size;
f701dc2a
PE
1160 /* FIXME: This leaks memory, as the old charset_table becomes
1161 unreachable. If the old charset table is charset_table_init
1162 then this leak is intentional; otherwise, it's unclear.
1163 If the latter memory leak is intentional, a
0065d054
PE
1164 comment should be added to explain this. If not, the old
1165 charset_table should be freed, by passing it as the 1st argument
1166 to xpalloc and removing the memcpy. */
3263d5a2
KH
1167 }
1168 id = charset_table_used++;
3263d5a2
KH
1169 new_definition_p = 1;
1170 }
2e344af3 1171
4f65af01 1172 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1173 charset.id = id;
1174 charset_table[id] = charset;
2e344af3 1175
ecca2aad 1176 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1177 {
1178 load_charset (&charset, 0);
1179 charset_table[id] = charset;
1180 }
ecca2aad 1181
3263d5a2 1182 if (charset.iso_final >= 0)
4ed46869 1183 {
3263d5a2
KH
1184 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1185 charset.iso_final) = id;
1186 if (new_definition_p)
1187 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
6c6f1994 1188 list1 (make_number (id)));
7c7dceee
KH
1189 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1190 charset_jisx0201_roman = id;
1191 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1192 charset_jisx0208_1978 = id;
1193 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1194 charset_jisx0208 = id;
d32320c4
KH
1195 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1196 charset_ksc5601 = id;
4ed46869 1197 }
d0cf2d48 1198
3263d5a2 1199 if (charset.emacs_mule_id >= 0)
4ed46869 1200 {
b84ae584 1201 emacs_mule_charset[charset.emacs_mule_id] = id;
4f65af01
KH
1202 if (charset.emacs_mule_id < 0xA0)
1203 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1204 else
1205 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1206 if (new_definition_p)
1207 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
6c6f1994 1208 list1 (make_number (id)));
4ed46869
KH
1209 }
1210
3263d5a2
KH
1211 if (new_definition_p)
1212 {
1213 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1214 if (charset.supplementary_p)
1215 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
6c6f1994 1216 list1 (make_number (id)));
8055c66a 1217 else
880820fe
KH
1218 {
1219 Lisp_Object tail;
1220
1221 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1222 {
1223 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1224
1225 if (cs->supplementary_p)
1226 break;
1227 }
1228 if (EQ (tail, Vcharset_ordered_list))
1229 Vcharset_ordered_list = Fcons (make_number (id),
1230 Vcharset_ordered_list);
1231 else if (NILP (tail))
1232 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
6c6f1994 1233 list1 (make_number (id)));
880820fe
KH
1234 else
1235 {
1236 val = Fcons (XCAR (tail), XCDR (tail));
1237 XSETCDR (tail, val);
1238 XSETCAR (tail, make_number (id));
1239 }
1240 }
dbbb237d 1241 charset_ordered_list_tick++;
3263d5a2 1242 }
4ed46869 1243
3263d5a2 1244 return Qnil;
4ed46869
KH
1245}
1246
2fe1edd1
KH
1247
1248/* Same as Fdefine_charset_internal but arguments are more convenient
1249 to call from C (typically in syms_of_charset). This can define a
1250 charset of `offset' method only. Return the ID of the new
1251 charset. */
1252
1253static int
dd4c5104
DN
1254define_charset_internal (Lisp_Object name,
1255 int dimension,
dfb6afda 1256 const char *code_space_chars,
dd4c5104
DN
1257 unsigned min_code, unsigned max_code,
1258 int iso_final, int iso_revision, int emacs_mule_id,
d5172d4f 1259 bool ascii_compatible, bool supplementary,
dd4c5104 1260 int code_offset)
2fe1edd1 1261{
dfb6afda 1262 const unsigned char *code_space = (const unsigned char *) code_space_chars;
2fe1edd1 1263 Lisp_Object args[charset_arg_max];
2fe1edd1
KH
1264 Lisp_Object val;
1265 int i;
1266
1267 args[charset_arg_name] = name;
1268 args[charset_arg_dimension] = make_number (dimension);
25721f5b 1269 val = make_uninit_vector (8);
2fe1edd1
KH
1270 for (i = 0; i < 8; i++)
1271 ASET (val, i, make_number (code_space[i]));
1272 args[charset_arg_code_space] = val;
1273 args[charset_arg_min_code] = make_number (min_code);
1274 args[charset_arg_max_code] = make_number (max_code);
1275 args[charset_arg_iso_final]
1276 = (iso_final < 0 ? Qnil : make_number (iso_final));
1277 args[charset_arg_iso_revision] = make_number (iso_revision);
1278 args[charset_arg_emacs_mule_id]
1279 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1280 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1281 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1282 args[charset_arg_invalid_code] = Qnil;
1283 args[charset_arg_code_offset] = make_number (code_offset);
1284 args[charset_arg_map] = Qnil;
1285 args[charset_arg_subset] = Qnil;
1286 args[charset_arg_superset] = Qnil;
1287 args[charset_arg_unify_map] = Qnil;
1288
694b6c97 1289 args[charset_arg_plist] =
3438fe21 1290 listn (CONSTYPE_HEAP, 14,
694b6c97
DA
1291 intern_c_string (":name"),
1292 args[charset_arg_name],
1293 intern_c_string (":dimension"),
1294 args[charset_arg_dimension],
1295 intern_c_string (":code-space"),
1296 args[charset_arg_code_space],
1297 intern_c_string (":iso-final-char"),
1298 args[charset_arg_iso_final],
1299 intern_c_string (":emacs-mule-id"),
1300 args[charset_arg_emacs_mule_id],
1301 intern_c_string (":ascii-compatible-p"),
1302 args[charset_arg_ascii_compatible_p],
1303 intern_c_string (":code-offset"),
1304 args[charset_arg_code_offset]);
2fe1edd1
KH
1305 Fdefine_charset_internal (charset_arg_max, args);
1306
1307 return XINT (CHARSET_SYMBOL_ID (name));
1308}
1309
1310
3263d5a2
KH
1311DEFUN ("define-charset-alias", Fdefine_charset_alias,
1312 Sdefine_charset_alias, 2, 2, 0,
1313 doc: /* Define ALIAS as an alias for charset CHARSET. */)
5842a27b 1314 (Lisp_Object alias, Lisp_Object charset)
4ed46869 1315{
3263d5a2
KH
1316 Lisp_Object attr;
1317
1318 CHECK_CHARSET_GET_ATTR (charset, attr);
1319 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1320 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1321 return Qnil;
1322}
4ed46869 1323
4ed46869 1324
3263d5a2 1325DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1326 doc: /* Return the property list of CHARSET. */)
5842a27b 1327 (Lisp_Object charset)
3263d5a2
KH
1328{
1329 Lisp_Object attrs;
1330
1331 CHECK_CHARSET_GET_ATTR (charset, attrs);
1332 return CHARSET_ATTR_PLIST (attrs);
1333}
1334
1335
1336DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1337 doc: /* Set CHARSET's property list to PLIST. */)
5842a27b 1338 (Lisp_Object charset, Lisp_Object plist)
3263d5a2
KH
1339{
1340 Lisp_Object attrs;
1341
1342 CHECK_CHARSET_GET_ATTR (charset, attrs);
4939150c 1343 ASET (attrs, charset_plist, plist);
3263d5a2
KH
1344 return plist;
1345}
1346
1347
dbbb237d 1348DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1349 doc: /* Unify characters of CHARSET with Unicode.
1350This means reading the relevant file and installing the table defined
dbbb237d
KH
1351by CHARSET's `:unify-map' property.
1352
64165ae2
DL
1353Optional second arg UNIFY-MAP is a file name string or a vector. It has
1354the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1355`define-charset' (which see).
1356
1357Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
5842a27b 1358 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
8a73a704 1359{
3263d5a2
KH
1360 int id;
1361 struct charset *cs;
8f924df7 1362
3263d5a2
KH
1363 CHECK_CHARSET_GET_ID (charset, id);
1364 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1365 if (NILP (deunify)
1366 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1367 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1368 return Qnil;
dbbb237d 1369
3263d5a2 1370 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1371 if (NILP (deunify))
1372 {
ecca2aad
KH
1373 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1374 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1375 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1376 if (NILP (unify_map))
1377 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1378 else
ecca2aad
KH
1379 {
1380 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1381 signal_error ("Bad unify-map", unify_map);
4939150c 1382 set_charset_attr (cs, charset_unify_map, unify_map);
ecca2aad
KH
1383 }
1384 if (NILP (Vchar_unify_table))
1385 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1386 char_table_set_range (Vchar_unify_table,
1387 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1388 CHARSET_UNIFIED_P (cs) = 1;
1389 }
1390 else if (CHAR_TABLE_P (Vchar_unify_table))
1391 {
3c7649c1
PE
1392 unsigned min_code = CHARSET_MIN_CODE (cs);
1393 unsigned max_code = CHARSET_MAX_CODE (cs);
dbbb237d
KH
1394 int min_char = DECODE_CHAR (cs, min_code);
1395 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1396
dbbb237d
KH
1397 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1398 }
8f924df7 1399
3263d5a2 1400 return Qnil;
8a73a704
KH
1401}
1402
3fac5a51
KH
1403DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1404 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1405 doc: /*
d0cf2d48 1406Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1407DIMENSION is the number of bytes to represent a character: 1 or 2.
1408CHARS is the number of characters in a dimension: 94 or 96.
1409
1410This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1411If there's no unused final char for the specified kind of charset,
fdb82f93 1412return nil. */)
5842a27b 1413 (Lisp_Object dimension, Lisp_Object chars)
3fac5a51
KH
1414{
1415 int final_char;
1416
b7826503
PJ
1417 CHECK_NUMBER (dimension);
1418 CHECK_NUMBER (chars);
3263d5a2
KH
1419 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1420 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1421 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1422 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1423 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1424 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1425 break;
3fac5a51
KH
1426 return (final_char <= '?' ? make_number (final_char) : Qnil);
1427}
1428
3263d5a2 1429static void
971de7fb 1430check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
4ed46869 1431{
3263d5a2
KH
1432 CHECK_NATNUM (dimension);
1433 CHECK_NATNUM (chars);
e6c3da20 1434 CHECK_CHARACTER (final_char);
4ed46869 1435
3263d5a2 1436 if (XINT (dimension) > 3)
c2982e87 1437 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
5fdb398c 1438 XINT (dimension));
4ed46869 1439 if (XINT (chars) != 94 && XINT (chars) != 96)
c2982e87 1440 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
3263d5a2 1441 if (XINT (final_char) < '0' || XINT (final_char) > '~')
e6c3da20
EZ
1442 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1443 (int)XINT (final_char));
3263d5a2
KH
1444}
1445
1446
1447DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1448 4, 4, 0,
cefd8c4f
KH
1449 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1450
1451On decoding by an ISO-2022 base coding system, when a charset
1452specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1453if CHARSET is designated instead. */)
5842a27b 1454 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
3263d5a2
KH
1455{
1456 int id;
d5172d4f 1457 bool chars_flag;
4ed46869 1458
3263d5a2
KH
1459 CHECK_CHARSET_GET_ID (charset, id);
1460 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1461 chars_flag = XINT (chars) == 96;
1462 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1463 return Qnil;
1464}
1465
3263d5a2 1466
2e344af3
KH
1467/* Return information about charsets in the text at PTR of NBYTES
1468 bytes, which are NCHARS characters. The value is:
f6302ac9 1469
cfe34140 1470 0: Each character is represented by one byte. This is always
3263d5a2
KH
1471 true for a unibyte string. For a multibyte string, true if
1472 it contains only ASCII characters.
1473
28c026cd
DL
1474 1: No charsets other than ascii, control-1, and latin-1 are
1475 found.
1d67c29b 1476
3263d5a2
KH
1477 2: Otherwise.
1478*/
4ed46869
KH
1479
1480int
971de7fb 1481string_xstring_p (Lisp_Object string)
4ed46869 1482{
8f924df7
KH
1483 const unsigned char *p = SDATA (string);
1484 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1485
8f924df7 1486 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1487 return 0;
1488
3263d5a2 1489 while (p < endp)
0282eb69 1490 {
3263d5a2 1491 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1492
3cc67a4d 1493 if (c >= 0x100)
3263d5a2 1494 return 2;
0282eb69 1495 }
3263d5a2
KH
1496 return 1;
1497}
05505664 1498
05505664 1499
3263d5a2 1500/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1501
3cc67a4d
KH
1502 CHARSETS is a vector. If Nth element is non-nil, it means the
1503 charset whose id is N is already found.
2e344af3 1504
3263d5a2 1505 It may lookup a translation table TABLE if supplied. */
2e344af3 1506
3263d5a2 1507static void
d5172d4f
PE
1508find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1509 ptrdiff_t nbytes, Lisp_Object charsets,
1510 Lisp_Object table, bool multibyte)
3263d5a2 1511{
dbbb237d 1512 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1513
1514 if (nchars == nbytes)
3263d5a2 1515 {
3cc67a4d
KH
1516 if (multibyte)
1517 ASET (charsets, charset_ascii, Qt);
1518 else
1519 while (ptr < pend)
1520 {
1521 int c = *ptr++;
1522
1523 if (!NILP (table))
1524 c = translate_char (table, c);
200fc949 1525 if (ASCII_CHAR_P (c))
3cc67a4d
KH
1526 ASET (charsets, charset_ascii, Qt);
1527 else
1528 ASET (charsets, charset_eight_bit, Qt);
1529 }
1530 }
1531 else
1532 {
1533 while (ptr < pend)
3263d5a2 1534 {
3cc67a4d
KH
1535 int c = STRING_CHAR_ADVANCE (ptr);
1536 struct charset *charset;
3263d5a2 1537
3cc67a4d
KH
1538 if (!NILP (table))
1539 c = translate_char (table, c);
1540 charset = CHAR_CHARSET (c);
1541 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1542 }
4ed46869 1543 }
4ed46869
KH
1544}
1545
1546DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1547 2, 3, 0,
fdb82f93
PJ
1548 doc: /* Return a list of charsets in the region between BEG and END.
1549BEG and END are buffer positions.
1550Optional arg TABLE if non-nil is a translation table to look up.
1551
fdb82f93
PJ
1552If the current buffer is unibyte, the returned list may contain
1553only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1554 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
4ed46869 1555{
3263d5a2 1556 Lisp_Object charsets;
d311d28c 1557 ptrdiff_t from, from_byte, to, stop, stop_byte;
42ca828e 1558 int i;
4ed46869 1559 Lisp_Object val;
d5172d4f 1560 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
4ed46869
KH
1561
1562 validate_region (&beg, &end);
1563 from = XFASTINT (beg);
1564 stop = to = XFASTINT (end);
6ae1f27e 1565
4ed46869 1566 if (from < GPT && GPT < to)
6ae1f27e
RS
1567 {
1568 stop = GPT;
1569 stop_byte = GPT_BYTE;
1570 }
1571 else
1572 stop_byte = CHAR_TO_BYTE (stop);
1573
1574 from_byte = CHAR_TO_BYTE (from);
1575
3263d5a2 1576 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1577 while (1)
1578 {
3263d5a2 1579 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1580 stop_byte - from_byte, charsets, table,
1581 multibyte);
4ed46869 1582 if (stop < to)
6ae1f27e
RS
1583 {
1584 from = stop, from_byte = stop_byte;
1585 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1586 }
4ed46869
KH
1587 else
1588 break;
1589 }
6ae1f27e 1590
4ed46869 1591 val = Qnil;
3263d5a2 1592 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1593 if (!NILP (AREF (charsets, i)))
3263d5a2 1594 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1595 return val;
1596}
1597
1598DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1599 1, 2, 0,
fdb82f93
PJ
1600 doc: /* Return a list of charsets in STR.
1601Optional arg TABLE if non-nil is a translation table to look up.
1602
fdb82f93 1603If STR is unibyte, the returned list may contain
3263d5a2 1604only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1605 (Lisp_Object str, Lisp_Object table)
4ed46869 1606{
3263d5a2 1607 Lisp_Object charsets;
4ed46869
KH
1608 int i;
1609 Lisp_Object val;
1610
b7826503 1611 CHECK_STRING (str);
87b089ad 1612
3263d5a2 1613 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1614 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1615 charsets, table,
1616 STRING_MULTIBYTE (str));
4ed46869 1617 val = Qnil;
3263d5a2 1618 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1619 if (!NILP (AREF (charsets, i)))
3263d5a2 1620 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1621 return val;
1622}
2e344af3 1623
4ed46869 1624\f
3263d5a2 1625
ecca2aad
KH
1626/* Return a unified character code for C (>= 0x110000). VAL is a
1627 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1628 charset symbol. */
41c8bfcf 1629static int
971de7fb 1630maybe_unify_char (int c, Lisp_Object val)
ecca2aad
KH
1631{
1632 struct charset *charset;
1633
1634 if (INTEGERP (val))
ccd9a01a 1635 return XFASTINT (val);
ecca2aad
KH
1636 if (NILP (val))
1637 return c;
1638
1639 CHECK_CHARSET_GET_CHARSET (val, charset);
291d430f 1640#ifdef REL_ALLOC
efc00ab1 1641 /* The call to load_charset below can allocate memory, which screws
291d430f
EZ
1642 callers of this function through STRING_CHAR_* macros that hold C
1643 pointers to buffer text, if REL_ALLOC is used. */
1644 r_alloc_inhibit_buffer_relocation (1);
1645#endif
ecca2aad
KH
1646 load_charset (charset, 1);
1647 if (! inhibit_load_charset_map)
1648 {
1649 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1650 if (! NILP (val))
ccd9a01a 1651 c = XFASTINT (val);
ecca2aad
KH
1652 }
1653 else
1654 {
1655 int code_index = c - CHARSET_CODE_OFFSET (charset);
1656 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1657
1658 if (unified > 0)
1659 c = unified;
1660 }
291d430f
EZ
1661#ifdef REL_ALLOC
1662 r_alloc_inhibit_buffer_relocation (0);
1663#endif
ecca2aad
KH
1664 return c;
1665}
1666
1667
bbd240ce 1668/* Return a character corresponding to the code-point CODE of
3263d5a2
KH
1669 CHARSET. */
1670
1671int
971de7fb 1672decode_char (struct charset *charset, unsigned int code)
4ed46869 1673{
3263d5a2
KH
1674 int c, char_index;
1675 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1676
3263d5a2
KH
1677 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1678 return -1;
4ed46869 1679
374c5cfd 1680 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1681 {
374c5cfd
KH
1682 Lisp_Object subset_info;
1683
1684 subset_info = CHARSET_SUBSET (charset);
1685 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1686 code -= XINT (AREF (subset_info, 3));
1687 if (code < XFASTINT (AREF (subset_info, 1))
1688 || code > XFASTINT (AREF (subset_info, 2)))
1689 c = -1;
1690 else
1691 c = DECODE_CHAR (charset, code);
2e344af3 1692 }
374c5cfd 1693 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1694 {
3263d5a2 1695 Lisp_Object parents;
4ed46869 1696
374c5cfd 1697 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1698 c = -1;
1699 for (; CONSP (parents); parents = XCDR (parents))
1700 {
1701 int id = XINT (XCAR (XCAR (parents)));
1702 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1703 unsigned this_code = code - code_offset;
4ed46869 1704
3263d5a2
KH
1705 charset = CHARSET_FROM_ID (id);
1706 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1707 break;
1708 }
1709 }
1710 else
ac4137cc 1711 {
3263d5a2 1712 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1713 if (char_index < 0)
1714 return -1;
4ed46869 1715
3263d5a2 1716 if (method == CHARSET_METHOD_MAP)
ac4137cc 1717 {
3263d5a2 1718 Lisp_Object decoder;
4ed46869 1719
3263d5a2
KH
1720 decoder = CHARSET_DECODER (charset);
1721 if (! VECTORP (decoder))
ecca2aad
KH
1722 {
1723 load_charset (charset, 1);
1724 decoder = CHARSET_DECODER (charset);
1725 }
1726 if (VECTORP (decoder))
1727 c = XINT (AREF (decoder, char_index));
1728 else
1729 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1730 }
ecca2aad 1731 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1732 {
3263d5a2 1733 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad 1734 if (CHARSET_UNIFIED_P (charset)
41c8bfcf
PE
1735 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1736 {
1737 /* Unify C with a Unicode character if possible. */
1738 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1739 c = maybe_unify_char (c, val);
1740 }
ac4137cc
KH
1741 }
1742 }
4ed46869 1743
3263d5a2 1744 return c;
90d7b74e
KH
1745}
1746
374c5cfd
KH
1747/* Variable used temporarily by the macro ENCODE_CHAR. */
1748Lisp_Object charset_work;
4ed46869 1749
d5172d4f 1750/* Return a code-point of C in CHARSET. If C doesn't belong to
28c026cd
DL
1751 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1752 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1753
3263d5a2 1754unsigned
971de7fb 1755encode_char (struct charset *charset, int c)
9d3d8cba 1756{
3263d5a2
KH
1757 unsigned code;
1758 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1759
3263d5a2 1760 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1761 {
6809ca75 1762 Lisp_Object deunifier;
ecca2aad 1763 int code_index = -1;
4ed46869 1764
3263d5a2
KH
1765 deunifier = CHARSET_DEUNIFIER (charset);
1766 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1767 {
ecca2aad 1768 load_charset (charset, 2);
3263d5a2 1769 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1770 }
ecca2aad
KH
1771 if (CHAR_TABLE_P (deunifier))
1772 {
1773 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1774
1775 if (INTEGERP (deunified))
1776 code_index = XINT (deunified);
1777 }
1778 else
1779 {
1780 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1781 }
1782 if (code_index >= 0)
1783 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1784 }
9d3d8cba 1785
374c5cfd
KH
1786 if (method == CHARSET_METHOD_SUBSET)
1787 {
1788 Lisp_Object subset_info;
1789 struct charset *this_charset;
1790
1791 subset_info = CHARSET_SUBSET (charset);
1792 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1793 code = ENCODE_CHAR (this_charset, c);
1794 if (code == CHARSET_INVALID_CODE (this_charset)
1795 || code < XFASTINT (AREF (subset_info, 1))
1796 || code > XFASTINT (AREF (subset_info, 2)))
1797 return CHARSET_INVALID_CODE (charset);
1798 code += XINT (AREF (subset_info, 3));
1799 return code;
1800 }
9d3d8cba 1801
374c5cfd 1802 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1803 {
3263d5a2 1804 Lisp_Object parents;
d2665018 1805
374c5cfd 1806 parents = CHARSET_SUPERSET (charset);
3263d5a2 1807 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1808 {
3263d5a2
KH
1809 int id = XINT (XCAR (XCAR (parents)));
1810 int code_offset = XINT (XCDR (XCAR (parents)));
1811 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1812
3263d5a2 1813 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1814 if (code != CHARSET_INVALID_CODE (this_charset))
1815 return code + code_offset;
beeedaad 1816 }
3263d5a2
KH
1817 return CHARSET_INVALID_CODE (charset);
1818 }
1bcc1567 1819
15c85a88
KH
1820 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1821 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1822 return CHARSET_INVALID_CODE (charset);
1bcc1567 1823
3263d5a2 1824 if (method == CHARSET_METHOD_MAP)
3f62427c 1825 {
3263d5a2 1826 Lisp_Object encoder;
beeedaad 1827 Lisp_Object val;
9b6a601f 1828
3263d5a2
KH
1829 encoder = CHARSET_ENCODER (charset);
1830 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1831 {
1832 load_charset (charset, 2);
1833 encoder = CHARSET_ENCODER (charset);
1834 }
1835 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1836 {
1837 val = CHAR_TABLE_REF (encoder, c);
1838 if (NILP (val))
1839 return CHARSET_INVALID_CODE (charset);
1840 code = XINT (val);
1841 if (! CHARSET_COMPACT_CODES_P (charset))
1842 code = INDEX_TO_CODE_POINT (charset, code);
1843 }
1844 else
1845 {
1846 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
51b59d79 1847 code = INDEX_TO_CODE_POINT (charset, code);
ecca2aad 1848 }
3263d5a2 1849 }
820ee249 1850 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1851 {
58c8a77d
PE
1852 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1853
1854 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1855 }
8ac5a9cc 1856
3263d5a2 1857 return code;
4ed46869
KH
1858}
1859
4ed46869 1860
3263d5a2
KH
1861DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1862 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1863Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1864
328a8179 1865CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
5842a27b 1866 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
4ed46869 1867{
3263d5a2
KH
1868 int c, id;
1869 unsigned code;
1870 struct charset *charsetp;
859f2b3c 1871
3263d5a2 1872 CHECK_CHARSET_GET_ID (charset, id);
be44ca6c 1873 code = cons_to_unsigned (code_point, UINT_MAX);
3263d5a2
KH
1874 charsetp = CHARSET_FROM_ID (id);
1875 c = DECODE_CHAR (charsetp, code);
1876 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1877}
1878
859f2b3c 1879
3263d5a2
KH
1880DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1881 doc: /* Encode the character CH into a code-point of CHARSET.
328a8179 1882Return nil if CHARSET doesn't include CH. */)
5842a27b 1883 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
4ed46869 1884{
34206dd2 1885 int c, id;
3263d5a2
KH
1886 unsigned code;
1887 struct charset *charsetp;
046b1f03 1888
3263d5a2 1889 CHECK_CHARSET_GET_ID (charset, id);
34206dd2
PE
1890 CHECK_CHARACTER (ch);
1891 c = XFASTINT (ch);
3263d5a2 1892 charsetp = CHARSET_FROM_ID (id);
34206dd2 1893 code = ENCODE_CHAR (charsetp, c);
3263d5a2
KH
1894 if (code == CHARSET_INVALID_CODE (charsetp))
1895 return Qnil;
be44ca6c 1896 return INTEGER_TO_CONS (code);
beeedaad
KH
1897}
1898
beeedaad 1899
b121a744
KH
1900DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1901 doc:
1902 /* Return a character of CHARSET whose position codes are CODEn.
1903
1904CODE1 through CODE4 are optional, but if you don't supply sufficient
1905position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1906is specified. */)
5842a27b 1907 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
beeedaad 1908{
3263d5a2
KH
1909 int id, dimension;
1910 struct charset *charsetp;
b121a744
KH
1911 unsigned code;
1912 int c;
87b089ad 1913
3263d5a2
KH
1914 CHECK_CHARSET_GET_ID (charset, id);
1915 charsetp = CHARSET_FROM_ID (id);
4ed46869 1916
b121a744
KH
1917 dimension = CHARSET_DIMENSION (charsetp);
1918 if (NILP (code1))
d47073ca
KH
1919 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1920 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1921 else
859f2b3c 1922 {
b121a744
KH
1923 CHECK_NATNUM (code1);
1924 if (XFASTINT (code1) >= 0x100)
1925 args_out_of_range (make_number (0xFF), code1);
1926 code = XFASTINT (code1);
859f2b3c 1927
b0a1e45e 1928 if (dimension > 1)
beeedaad 1929 {
b121a744 1930 code <<= 8;
b0a1e45e
KH
1931 if (NILP (code2))
1932 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1933 else
b121a744 1934 {
b0a1e45e
KH
1935 CHECK_NATNUM (code2);
1936 if (XFASTINT (code2) >= 0x100)
1937 args_out_of_range (make_number (0xFF), code2);
1938 code |= XFASTINT (code2);
b121a744 1939 }
99529c2c 1940
b0a1e45e 1941 if (dimension > 2)
b121a744
KH
1942 {
1943 code <<= 8;
b0a1e45e
KH
1944 if (NILP (code3))
1945 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1946 else
1947 {
b0a1e45e
KH
1948 CHECK_NATNUM (code3);
1949 if (XFASTINT (code3) >= 0x100)
1950 args_out_of_range (make_number (0xFF), code3);
1951 code |= XFASTINT (code3);
1952 }
1953
1954 if (dimension > 3)
1955 {
1956 code <<= 8;
1957 if (NILP (code4))
1958 code |= charsetp->code_space[0];
1959 else
1960 {
1961 CHECK_NATNUM (code4);
1962 if (XFASTINT (code4) >= 0x100)
1963 args_out_of_range (make_number (0xFF), code4);
1964 code |= XFASTINT (code4);
1965 }
b121a744
KH
1966 }
1967 }
beeedaad 1968 }
859f2b3c 1969 }
beeedaad 1970
b121a744
KH
1971 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1972 code &= 0x7F7F7F7F;
1973 c = DECODE_CHAR (charsetp, code);
1974 if (c < 0)
1975 error ("Invalid code(s)");
3263d5a2 1976 return make_number (c);
4ed46869
KH
1977}
1978
beeedaad 1979
3263d5a2
KH
1980/* Return the first charset in CHARSET_LIST that contains C.
1981 CHARSET_LIST is a list of charset IDs. If it is nil, use
1982 Vcharset_ordered_list. */
beeedaad 1983
3263d5a2 1984struct charset *
971de7fb 1985char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2e344af3 1986{
d5172d4f 1987 bool maybe_null = 0;
8a1816bb 1988
3263d5a2
KH
1989 if (NILP (charset_list))
1990 charset_list = Vcharset_ordered_list;
8a1816bb
KH
1991 else
1992 maybe_null = 1;
beeedaad 1993
6c652beb 1994 while (CONSP (charset_list))
2e344af3 1995 {
3263d5a2
KH
1996 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1997 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 1998
3263d5a2 1999 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2000 {
3263d5a2
KH
2001 if (code_return)
2002 *code_return = code;
2003 return charset;
3f62427c 2004 }
3263d5a2 2005 charset_list = XCDR (charset_list);
c0be27fd
KH
2006 if (! maybe_null
2007 && c <= MAX_UNICODE_CHAR
2008 && EQ (charset_list, Vcharset_non_preferred_head))
6c652beb 2009 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2010 }
8a1816bb
KH
2011 return (maybe_null ? NULL
2012 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2013 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2014}
2015
2e344af3 2016
3263d5a2 2017DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2018 doc:
d0cf2d48 2019 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2020The charset is decided by the current priority order of charsets.
2021A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2022CH in the charset. */)
5842a27b 2023 (Lisp_Object ch)
4ed46869 2024{
3263d5a2
KH
2025 struct charset *charset;
2026 int c, dimension;
2027 unsigned code;
4ed46869
KH
2028 Lisp_Object val;
2029
3263d5a2
KH
2030 CHECK_CHARACTER (ch);
2031 c = XFASTINT (ch);
2032 charset = CHAR_CHARSET (c);
2033 if (! charset)
1088b922 2034 emacs_abort ();
3263d5a2
KH
2035 code = ENCODE_CHAR (charset, c);
2036 if (code == CHARSET_INVALID_CODE (charset))
1088b922 2037 emacs_abort ();
3263d5a2 2038 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2039 for (val = Qnil; dimension > 0; dimension--)
2040 {
2041 val = Fcons (make_number (code & 0xFF), val);
2042 code >>= 8;
2043 }
3263d5a2 2044 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2045}
2046
740f080d 2047
4cb75c4b
KH
2048DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2049 doc: /* Return the charset of highest priority that contains CH.
15c6d837
XF
2050ASCII characters are an exception: for them, this function always
2051returns `ascii'.
4cb75c4b
KH
2052If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2053from which to find the charset. It may also be a coding system. In
2054that case, find the charset from what supported by that coding system. */)
5842a27b 2055 (Lisp_Object ch, Lisp_Object restriction)
4ed46869 2056{
3263d5a2 2057 struct charset *charset;
4ed46869 2058
3263d5a2 2059 CHECK_CHARACTER (ch);
4cb75c4b
KH
2060 if (NILP (restriction))
2061 charset = CHAR_CHARSET (XINT (ch));
2062 else
2063 {
4cb75c4b
KH
2064 if (CONSP (restriction))
2065 {
7a84eee5
KH
2066 int c = XFASTINT (ch);
2067
2068 for (; CONSP (restriction); restriction = XCDR (restriction))
4cb75c4b 2069 {
f6095868 2070 struct charset *rcharset;
4cb75c4b 2071
f6095868
PE
2072 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2073 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
7a84eee5 2074 return XCAR (restriction);
4cb75c4b 2075 }
7a84eee5 2076 return Qnil;
4cb75c4b 2077 }
7a84eee5
KH
2078 restriction = coding_system_charset_list (restriction);
2079 charset = char_charset (XINT (ch), restriction, NULL);
4cb75c4b
KH
2080 if (! charset)
2081 return Qnil;
2082 }
3263d5a2 2083 return (CHARSET_NAME (charset));
4ed46869
KH
2084}
2085
17e7ef1b 2086
3263d5a2
KH
2087DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2088 doc: /*
2089Return charset of a character in the current buffer at position POS.
cd1181db 2090If POS is nil, it defaults to the current point.
3263d5a2 2091If POS is out of range, the value is nil. */)
5842a27b 2092 (Lisp_Object pos)
2e344af3 2093{
3263d5a2
KH
2094 Lisp_Object ch;
2095 struct charset *charset;
046b1f03 2096
3263d5a2
KH
2097 ch = Fchar_after (pos);
2098 if (! INTEGERP (ch))
2099 return ch;
2100 charset = CHAR_CHARSET (XINT (ch));
2101 return (CHARSET_NAME (charset));
6ae1f27e 2102}
9036eb45 2103
87b089ad 2104
3263d5a2
KH
2105DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2106 doc: /*
2107Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2108
2109ISO 2022's designation sequence (escape sequence) distinguishes charsets
2110by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2111whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2112See the documentation of the function `charset-info' for the meanings of
2113DIMENSION, CHARS, and FINAL-CHAR. */)
5842a27b 2114 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
6ae1f27e 2115{
3263d5a2 2116 int id;
d5172d4f 2117 bool chars_flag;
a8a35e61 2118
3263d5a2 2119 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2120 chars_flag = XFASTINT (chars) == 96;
2121 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2122 XFASTINT (final_char));
2123 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2124}
2125
87b089ad 2126
3263d5a2
KH
2127DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2128 0, 0, 0,
2129 doc: /*
ecca2aad
KH
2130Internal use only.
2131Clear temporary charset mapping tables.
2132It should be called only from temacs invoked for dumping. */)
5842a27b 2133 (void)
87b089ad 2134{
ecca2aad 2135 if (temp_charset_work)
87b089ad 2136 {
baad03f0 2137 xfree (temp_charset_work);
ecca2aad 2138 temp_charset_work = NULL;
2e344af3 2139 }
2e344af3 2140
ecca2aad
KH
2141 if (CHAR_TABLE_P (Vchar_unify_table))
2142 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2143
3263d5a2 2144 return Qnil;
740f080d
KH
2145}
2146
8ddf5e57
DL
2147DEFUN ("charset-priority-list", Fcharset_priority_list,
2148 Scharset_priority_list, 0, 1, 0,
2149 doc: /* Return the list of charsets ordered by priority.
2150HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 2151 (Lisp_Object highestp)
2e344af3 2152{
8ddf5e57 2153 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2154
8ddf5e57 2155 if (!NILP (highestp))
16fed1fc 2156 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2157
8ddf5e57 2158 while (!NILP (list))
2e344af3 2159 {
16fed1fc 2160 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2161 list = XCDR (list);
2e344af3 2162 }
8ddf5e57 2163 return Fnreverse (val);
2e344af3
KH
2164}
2165
8ddf5e57
DL
2166DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2167 1, MANY, 0,
2168 doc: /* Assign higher priority to the charsets given as arguments.
2169usage: (set-charset-priority &rest charsets) */)
f66c7cf8 2170 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 2171{
af7c60ca 2172 Lisp_Object new_head, old_list, arglist[2];
321c819c 2173 Lisp_Object list_2022, list_emacs_mule;
f66c7cf8 2174 ptrdiff_t i;
c5101a77 2175 int id;
4ed46869 2176
8ddf5e57 2177 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2178 new_head = Qnil;
8ddf5e57 2179 for (i = 0; i < nargs; i++)
4ed46869 2180 {
8ddf5e57 2181 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2182 if (! NILP (Fmemq (make_number (id), old_list)))
2183 {
2184 old_list = Fdelq (make_number (id), old_list);
2185 new_head = Fcons (make_number (id), new_head);
2186 }
5729c92f 2187 }
8ddf5e57 2188 arglist[0] = Fnreverse (new_head);
6a9c90ec 2189 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2190 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2191 charset_ordered_list_tick++;
5729c92f 2192
6809ca75 2193 charset_unibyte = -1;
321c819c 2194 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2195 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2196 {
e77415b0 2197 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2198 list_2022 = Fcons (XCAR (old_list), list_2022);
2199 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2200 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2201 if (charset_unibyte < 0)
2202 {
2203 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2204
2205 if (CHARSET_DIMENSION (charset) == 1
2206 && CHARSET_ASCII_COMPATIBLE_P (charset)
2207 && CHARSET_MAX_CHAR (charset) >= 0x80)
2208 charset_unibyte = CHARSET_ID (charset);
2209 }
4ed46869 2210 }
321c819c
KH
2211 Viso_2022_charset_list = Fnreverse (list_2022);
2212 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2213 if (charset_unibyte < 0)
2214 charset_unibyte = charset_iso_8859_1;
4ed46869 2215
8ddf5e57 2216 return Qnil;
4ed46869
KH
2217}
2218
d5b33309
KH
2219DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2220 0, 1, 0,
2221 doc: /* Internal use only.
2222Return charset identification number of CHARSET. */)
5842a27b 2223 (Lisp_Object charset)
4ed46869 2224{
d5b33309 2225 int id;
4ed46869 2226
d5b33309
KH
2227 CHECK_CHARSET_GET_ID (charset, id);
2228 return make_number (id);
4ed46869
KH
2229}
2230
7a84eee5
KH
2231struct charset_sort_data
2232{
2233 Lisp_Object charset;
2234 int id;
0065d054 2235 ptrdiff_t priority;
7a84eee5
KH
2236};
2237
2238static int
2239charset_compare (const void *d1, const void *d2)
2240{
2241 const struct charset_sort_data *data1 = d1, *data2 = d2;
0065d054
PE
2242 if (data1->priority != data2->priority)
2243 return data1->priority < data2->priority ? -1 : 1;
2244 return 0;
7a84eee5
KH
2245}
2246
2247DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2248 doc: /* Sort charset list CHARSETS by a priority of each charset.
2249Return the sorted list. CHARSETS is modified by side effects.
2250See also `charset-priority-list' and `set-charset-priority'. */)
2251 (Lisp_Object charsets)
2252{
2253 Lisp_Object len = Flength (charsets);
0065d054
PE
2254 ptrdiff_t n = XFASTINT (len), i, j;
2255 int done;
7a84eee5
KH
2256 Lisp_Object tail, elt, attrs;
2257 struct charset_sort_data *sort_data;
726929c4 2258 int id, min_id = INT_MAX, max_id = INT_MIN;
7a84eee5
KH
2259 USE_SAFE_ALLOCA;
2260
2261 if (n == 0)
2262 return Qnil;
0065d054 2263 SAFE_NALLOCA (sort_data, 1, n);
7a84eee5
KH
2264 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2265 {
2266 elt = XCAR (tail);
2267 CHECK_CHARSET_GET_ATTR (elt, attrs);
2268 sort_data[i].charset = elt;
2269 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
726929c4 2270 if (id < min_id)
7a84eee5 2271 min_id = id;
726929c4 2272 if (id > max_id)
7a84eee5
KH
2273 max_id = id;
2274 }
2275 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2276 done < n && CONSP (tail); tail = XCDR (tail), i++)
2277 {
2278 elt = XCAR (tail);
2279 id = XFASTINT (elt);
2280 if (id >= min_id && id <= max_id)
2281 for (j = 0; j < n; j++)
2282 if (sort_data[j].id == id)
2283 {
2284 sort_data[j].priority = i;
2285 done++;
2286 }
2287 }
2288 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2289 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2290 XSETCAR (tail, sort_data[i].charset);
2291 SAFE_FREE ();
2292 return charsets;
2293}
2294
4ed46869 2295\f
3263d5a2 2296void
971de7fb 2297init_charset (void)
4ed46869 2298{
c8f94403
GM
2299 Lisp_Object tempdir;
2300 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
73dcdb9f 2301 if (! file_accessible_directory_p (SSDATA (tempdir)))
c8f94403 2302 {
63541ed8
GM
2303 /* This used to be non-fatal (dir_warning), but it should not
2304 happen, and if it does sooner or later it will cause some
16905bec 2305 obscure problem (eg bug#6401), so better abort. */
63541ed8
GM
2306 fprintf (stderr, "Error: charsets directory not found:\n\
2307%s\n\
7b9cb544 2308Emacs will not function correctly without the character map files.\n%s\
c8f94403 2309Please check your installation!\n",
7b9cb544
GM
2310 SDATA (tempdir),
2311 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2312variable is set, maybe it has the wrong value?\n" : "");
63541ed8 2313 exit (1);
c8f94403
GM
2314 }
2315
6c6f1994 2316 Vcharset_map_path = list1 (tempdir);
4ed46869
KH
2317}
2318
4ed46869 2319
dfcf069d 2320void
971de7fb 2321init_charset_once (void)
4ed46869
KH
2322{
2323 int i, j, k;
2324
3263d5a2
KH
2325 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2326 for (j = 0; j < ISO_MAX_CHARS; j++)
2327 for (k = 0; k < ISO_MAX_FINAL; k++)
2328 iso_charset_table[i][j][k] = -1;
4ed46869 2329
60383934 2330 for (i = 0; i < 256; i++)
b84ae584 2331 emacs_mule_charset[i] = -1;
4ed46869 2332
7c7dceee
KH
2333 charset_jisx0201_roman = -1;
2334 charset_jisx0208_1978 = -1;
2335 charset_jisx0208 = -1;
d32320c4 2336 charset_ksc5601 = -1;
4ed46869
KH
2337}
2338
2339#ifdef emacs
2340
f701dc2a
PE
2341/* Allocate an initial charset table that is large enough to handle
2342 Emacs while it is bootstrapping. As of September 2011, the size
2343 needs to be at least 166; make it a bit bigger to allow for future
2344 expansion.
2345
2346 Don't make the value so small that the table is reallocated during
2347 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2348 during an initial bootstrap wreak havoc after dumping; see the
2349 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2350 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2351static struct charset charset_table_init[180];
2352
dfcf069d 2353void
971de7fb 2354syms_of_charset (void)
4ed46869 2355{
3263d5a2
KH
2356 DEFSYM (Qcharsetp, "charsetp");
2357
2358 DEFSYM (Qascii, "ascii");
2359 DEFSYM (Qunicode, "unicode");
6c652beb 2360 DEFSYM (Qemacs, "emacs");
2fe1edd1 2361 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2362 DEFSYM (Qiso_8859_1, "iso-8859-1");
2363
2364 DEFSYM (Qgl, "gl");
2365 DEFSYM (Qgr, "gr");
2366
3263d5a2
KH
2367 staticpro (&Vcharset_ordered_list);
2368 Vcharset_ordered_list = Qnil;
2369
2370 staticpro (&Viso_2022_charset_list);
2371 Viso_2022_charset_list = Qnil;
2372
2373 staticpro (&Vemacs_mule_charset_list);
2374 Vemacs_mule_charset_list = Qnil;
2375
3943ed76 2376 /* Don't staticpro them here. It's done in syms_of_fns. */
088dcc3e
DN
2377 QCtest = intern_c_string (":test");
2378 Qeq = intern_c_string ("eq");
3943ed76 2379
3263d5a2 2380 staticpro (&Vcharset_hash_table);
8f924df7
KH
2381 {
2382 Lisp_Object args[2];
2383 args[0] = QCtest;
2384 args[1] = Qeq;
2385 Vcharset_hash_table = Fmake_hash_table (2, args);
2386 }
3263d5a2 2387
f701dc2a 2388 charset_table = charset_table_init;
faa52174 2389 charset_table_size = ARRAYELTS (charset_table_init);
3263d5a2
KH
2390 charset_table_used = 0;
2391
3263d5a2
KH
2392 defsubr (&Scharsetp);
2393 defsubr (&Smap_charset_chars);
2394 defsubr (&Sdefine_charset_internal);
2395 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2396 defsubr (&Scharset_plist);
2397 defsubr (&Sset_charset_plist);
2398 defsubr (&Sunify_charset);
3fac5a51 2399 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2400 defsubr (&Sdeclare_equiv_charset);
2401 defsubr (&Sfind_charset_region);
2402 defsubr (&Sfind_charset_string);
3263d5a2
KH
2403 defsubr (&Sdecode_char);
2404 defsubr (&Sencode_char);
4ed46869 2405 defsubr (&Ssplit_char);
3263d5a2 2406 defsubr (&Smake_char);
4ed46869 2407 defsubr (&Schar_charset);
90d7b74e 2408 defsubr (&Scharset_after);
4ed46869 2409 defsubr (&Siso_charset);
3263d5a2 2410 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2411 defsubr (&Scharset_priority_list);
2412 defsubr (&Sset_charset_priority);
d5b33309 2413 defsubr (&Scharset_id_internal);
7a84eee5 2414 defsubr (&Ssort_charsets);
3263d5a2 2415
29208e82 2416 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
fb7ada5f 2417 doc: /* List of directories to search for charset map files. */);
4beef065 2418 Vcharset_map_path = Qnil;
4ed46869 2419
29208e82 2420 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
ecca2aad
KH
2421 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2422 inhibit_load_charset_map = 0;
2423
29208e82 2424 DEFVAR_LISP ("charset-list", Vcharset_list,
528623a0 2425 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2426 Vcharset_list = Qnil;
2427
29208e82 2428 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
6a9c90ec
KH
2429 doc: /* ISO639 language mnemonic symbol for the current language environment.
2430If the current language environment is for multiple languages (e.g. "Latin-1"),
2431the value may be a list of mnemonics. */);
2432 Vcurrent_iso639_language = Qnil;
2433
2fe1edd1 2434 charset_ascii
12455b2f 2435 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2fe1edd1 2436 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523 2437 charset_iso_8859_1
12455b2f 2438 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
14e3d523 2439 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2440 charset_unicode
12455b2f 2441 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2fe1edd1 2442 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb 2443 charset_emacs
12455b2f 2444 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
6c652beb 2445 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1 2446 charset_eight_bit
12455b2f 2447 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
880820fe 2448 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2449 MAX_5_BYTE_CHAR + 1);
6809ca75 2450 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2451}
2452
2453#endif /* emacs */