remove `declare' macro
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
4ed46869 2
ba318903 3Copyright (C) 2001-2014 Free Software Foundation, Inc.
1f41ee56
GM
4
5Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
9
10Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
4ed46869 13
369314dc
KH
14This file is part of GNU Emacs.
15
9ec0b715 16GNU Emacs is free software: you can redistribute it and/or modify
369314dc 17it under the terms of the GNU General Public License as published by
9ec0b715
GM
18the Free Software Foundation, either version 3 of the License, or
19(at your option) any later version.
4ed46869 20
369314dc
KH
21GNU Emacs is distributed in the hope that it will be useful,
22but WITHOUT ANY WARRANTY; without even the implied warranty of
23MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24GNU General Public License for more details.
4ed46869 25
369314dc 26You should have received a copy of the GNU General Public License
9ec0b715 27along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 28
68c45bf0 29#include <config.h>
68c45bf0 30
f4b1eb36 31#include <errno.h>
4ed46869 32#include <stdio.h>
3263d5a2 33#include <unistd.h>
726929c4 34#include <limits.h>
4ed46869 35#include <sys/types.h>
620f13b0 36#include <c-ctype.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869 43
04c2f2c5 44/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 45
3263d5a2 46 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 47 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
4ed46869 51
3263d5a2
KH
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
04c2f2c5 55 charset_table as a struct charset.
4ed46869 56
3263d5a2 57*/
4ed46869 58
3263d5a2
KH
59/* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61Lisp_Object Vcharset_hash_table;
4ed46869 62
3263d5a2
KH
63/* Table of struct charset. */
64struct charset *charset_table;
4ed46869 65
0065d054 66static ptrdiff_t charset_table_size;
5af5dd92 67static int charset_table_used;
4ed46869 68
3263d5a2 69Lisp_Object Qcharsetp;
4ed46869 70
3263d5a2
KH
71/* Special charset symbols. */
72Lisp_Object Qascii;
955cbe7b
PE
73static Lisp_Object Qeight_bit;
74static Lisp_Object Qiso_8859_1;
75static Lisp_Object Qunicode;
76static Lisp_Object Qemacs;
4ed46869 77
3263d5a2
KH
78/* The corresponding charsets. */
79int charset_ascii;
2fe1edd1 80int charset_eight_bit;
a2cb4e63 81static int charset_iso_8859_1;
3263d5a2 82int charset_unicode;
a2cb4e63 83static int charset_emacs;
b0e3cf2b 84
7c7dceee
KH
85/* The other special charsets. */
86int charset_jisx0201_roman;
87int charset_jisx0208_1978;
88int charset_jisx0208;
d32320c4 89int charset_ksc5601;
c1a08b4c 90
3263d5a2 91/* Value of charset attribute `charset-iso-plane'. */
955cbe7b 92static Lisp_Object Qgl, Qgr;
c1a08b4c 93
d1a04588
KH
94/* Charset of unibyte characters. */
95int charset_unibyte;
4ed46869 96
3263d5a2
KH
97/* List of charsets ordered by the priority. */
98Lisp_Object Vcharset_ordered_list;
4ed46869 99
6a9c90ec
KH
100/* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102Lisp_Object Vcharset_non_preferred_head;
103
dbbb237d 104/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 105 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
106 -1. */
107unsigned short charset_ordered_list_tick;
4ed46869 108
3263d5a2
KH
109/* List of iso-2022 charsets. */
110Lisp_Object Viso_2022_charset_list;
35e623fb 111
3263d5a2
KH
112/* List of emacs-mule charsets. */
113Lisp_Object Vemacs_mule_charset_list;
114
b84ae584 115int emacs_mule_charset[256];
4ed46869
KH
116
117/* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
119int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
69f8de5b
KH
121#define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
3c7649c1 123 ? (int) ((code) - (charset)->min_code) \
69f8de5b
KH
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
3c7649c1
PE
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
3263d5a2
KH
136 : -1)
137
138
3c7649c1
PE
139/* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
3263d5a2 142
820ee249
KH
143#define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
150 << 8) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
153 << 16) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
155 << 24))))
4cf9710d 156
ecca2aad
KH
157/* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
8a73a704 159
ecca2aad
KH
160static struct
161{
162 /* The current charset for which the following tables are setup. */
163 struct charset *current;
164
165 /* 1 iff the following table is used for encoder. */
166 short for_encoder;
167
0b381c7e
JB
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
ecca2aad
KH
170 int min_char, max_char;
171
e4769531 172 /* A Unicode character corresponding to the code index 0 (i.e. the
ecca2aad 173 minimum code-point) of the current charset, or -1 if the code
e4769531 174 index 0 is not a Unicode character. This is checked when
ecca2aad
KH
175 table.encoder[CHAR] is zero. */
176 int zero_index_char;
177
178 union {
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder[0x20000];
189 } table;
190} *temp_charset_work;
191
192#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
193 do { \
194 if ((CODE) == 0) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
198 else \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
200 } while (0)
201
202#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
208
209#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
211
212#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
046b1f03 214\f
93bcb785 215
e9ce014c
KH
216/* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
d5172d4f 218bool charset_map_loaded;
4ed46869 219
e9ce014c 220struct charset_map_entries
4ed46869 221{
e9ce014c
KH
222 struct {
223 unsigned from, to;
224 int c;
d51c0634 225 } *entry; /* [0x10000] */
e9ce014c
KH
226 struct charset_map_entries *next;
227};
228
ecca2aad
KH
229/* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
232
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
235
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
4cf9710d 238
ecca2aad
KH
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 244
ecca2aad 245 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 246
ecca2aad
KH
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
252*/
4ed46869 253
3263d5a2 254static void
971de7fb 255load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
4ed46869 256{
89ef49df 257 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
3263d5a2 258 unsigned max_code = CHARSET_MAX_CODE (charset);
d5172d4f 259 bool ascii_compatible_p = charset->ascii_compatible_p;
3263d5a2 260 int min_char, max_char, nonascii_min_char;
3263d5a2 261 int i;
3263d5a2 262 unsigned char *fast_map = charset->fast_map;
99529c2c 263
e9ce014c
KH
264 if (n_entries <= 0)
265 return;
266
ecca2aad 267 if (control_flag)
8ac5a9cc 268 {
ecca2aad
KH
269 if (! inhibit_load_charset_map)
270 {
271 if (control_flag == 1)
272 {
273 if (charset->method == CHARSET_METHOD_MAP)
274 {
275 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 276
4939150c
PE
277 vec = Fmake_vector (make_number (n), make_number (-1));
278 set_charset_attr (charset, charset_decoder, vec);
ecca2aad
KH
279 }
280 else
281 {
282 char_table_set_range (Vchar_unify_table,
283 charset->min_char, charset->max_char,
284 Qnil);
285 }
286 }
287 else
288 {
289 table = Fmake_char_table (Qnil, Qnil);
4939150c
PE
290 set_charset_attr (charset,
291 (charset->method == CHARSET_METHOD_MAP
292 ? charset_encoder : charset_deunifier),
293 table);
ecca2aad
KH
294 }
295 }
296 else
297 {
298 if (! temp_charset_work)
38182d90 299 temp_charset_work = xmalloc (sizeof *temp_charset_work);
ecca2aad
KH
300 if (control_flag == 1)
301 {
302 memset (temp_charset_work->table.decoder, -1,
303 sizeof (int) * 0x10000);
ecca2aad
KH
304 }
305 else
306 {
307 memset (temp_charset_work->table.encoder, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work->zero_index_char = -1;
310 }
311 temp_charset_work->current = charset;
312 temp_charset_work->for_encoder = (control_flag == 2);
313 control_flag += 2;
314 }
3263d5a2 315 charset_map_loaded = 1;
2e344af3 316 }
6662e69b 317
e9ce014c 318 min_char = max_char = entries->entry[0].c;
3263d5a2 319 nonascii_min_char = MAX_CHAR;
e9ce014c 320 for (i = 0; i < n_entries; i++)
2e344af3 321 {
e9ce014c 322 unsigned from, to;
3befa583 323 int from_index, to_index, lim_index;
3b4f4446 324 int from_c, to_c;
e9ce014c 325 int idx = i % 0x10000;
3263d5a2 326
e9ce014c
KH
327 if (i > 0 && idx == 0)
328 entries = entries->next;
329 from = entries->entry[idx].from;
330 to = entries->entry[idx].to;
3b4f4446
KH
331 from_c = entries->entry[idx].c;
332 from_index = CODE_POINT_TO_INDEX (charset, from);
333 if (from == to)
6662e69b 334 {
3b4f4446
KH
335 to_index = from_index;
336 to_c = from_c;
6662e69b 337 }
3b4f4446 338 else
6662e69b 339 {
3b4f4446
KH
340 to_index = CODE_POINT_TO_INDEX (charset, to);
341 to_c = from_c + (to_index - from_index);
6662e69b 342 }
3b4f4446
KH
343 if (from_index < 0 || to_index < 0)
344 continue;
3befa583 345 lim_index = to_index + 1;
3263d5a2 346
ecca2aad
KH
347 if (to_c > max_char)
348 max_char = to_c;
349 else if (from_c < min_char)
350 min_char = from_c;
6662e69b 351
ecca2aad
KH
352 if (control_flag == 1)
353 {
354 if (charset->method == CHARSET_METHOD_MAP)
3befa583 355 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
356 ASET (vec, from_index, make_number (from_c));
357 else
3befa583 358 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
359 CHAR_TABLE_SET (Vchar_unify_table,
360 CHARSET_CODE_OFFSET (charset) + from_index,
361 make_number (from_c));
362 }
363 else if (control_flag == 2)
364 {
365 if (charset->method == CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset))
3befa583 367 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad 368 {
3c7649c1
PE
369 unsigned code = from_index;
370 code = INDEX_TO_CODE_POINT (charset, code);
ecca2aad
KH
371
372 if (NILP (CHAR_TABLE_REF (table, from_c)))
373 CHAR_TABLE_SET (table, from_c, make_number (code));
374 }
375 else
3befa583 376 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
377 {
378 if (NILP (CHAR_TABLE_REF (table, from_c)))
379 CHAR_TABLE_SET (table, from_c, make_number (from_index));
380 }
381 }
382 else if (control_flag == 3)
3befa583 383 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
384 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
385 else if (control_flag == 4)
3befa583 386 for (; from_index < lim_index; from_index++, from_c++)
ecca2aad
KH
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
388 else /* control_flag == 0 */
389 {
3b4f4446
KH
390 if (ascii_compatible_p)
391 {
200fc949 392 if (! ASCII_CHAR_P (from_c))
3b4f4446
KH
393 {
394 if (from_c < nonascii_min_char)
395 nonascii_min_char = from_c;
396 }
200fc949 397 else if (! ASCII_CHAR_P (to_c))
3b4f4446
KH
398 {
399 nonascii_min_char = 0x80;
400 }
401 }
177c0ea7 402
ecca2aad
KH
403 for (; from_c <= to_c; from_c++)
404 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 405 }
8ac5a9cc 406 }
3263d5a2 407
ecca2aad 408 if (control_flag == 0)
4ed46869 409 {
3263d5a2
KH
410 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
411 ? nonascii_min_char : min_char);
412 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 413 }
ecca2aad
KH
414 else if (control_flag == 4)
415 {
416 temp_charset_work->min_char = min_char;
417 temp_charset_work->max_char = max_char;
418 }
4ed46869
KH
419}
420
12bcae05 421
3263d5a2 422/* Read a hexadecimal number (preceded by "0x") from the file FP while
3b59c351 423 paying attention to comment character '#'. */
12bcae05 424
b0ab8123 425static unsigned
d5172d4f 426read_hex (FILE *fp, bool *eof, bool *overflow)
12bcae05 427{
3263d5a2
KH
428 int c;
429 unsigned n;
12bcae05 430
3263d5a2
KH
431 while ((c = getc (fp)) != EOF)
432 {
69f8de5b 433 if (c == '#')
3263d5a2
KH
434 {
435 while ((c = getc (fp)) != EOF && c != '\n');
436 }
437 else if (c == '0')
438 {
439 if ((c = getc (fp)) == EOF || c == 'x')
440 break;
441 }
8f924df7 442 }
3263d5a2
KH
443 if (c == EOF)
444 {
445 *eof = 1;
446 return 0;
447 }
3263d5a2 448 n = 0;
620f13b0 449 while (c_isxdigit (c = getc (fp)))
caeeedc1
PE
450 {
451 if (UINT_MAX >> 4 < n)
452 *overflow = 1;
3263d5a2 453 n = ((n << 4)
caeeedc1
PE
454 | (c - ('0' <= c && c <= '9' ? '0'
455 : 'A' <= c && c <= 'F' ? 'A' - 10
456 : 'a' - 10)));
457 }
e9ce014c
KH
458 if (c != EOF)
459 ungetc (c, fp);
3263d5a2
KH
460 return n;
461}
12bcae05 462
3263d5a2 463/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
464 Each line of MAPFILE has this form
465 0xAAAA 0xCCCC
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
468 0xAAAA-0xBBBB 0xCCCC
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
4ed46869 471
3263d5a2
KH
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 474 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
475 range.
476
18a10a21
JB
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 479
e9ce014c 480static void
f4b1eb36
PE
481load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
482 int control_flag)
4ed46869 483{
e9ce014c
KH
484 unsigned min_code = CHARSET_MIN_CODE (charset);
485 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2 486 int fd;
5d1922cd 487 FILE *fp = NULL;
3263d5a2 488 Lisp_Object suffixes;
e9ce014c 489 struct charset_map_entries *head, *entries;
d311d28c
PE
490 int n_entries;
491 ptrdiff_t count;
4ed46869 492
6c6f1994 493 suffixes = list2 (build_string (".map"), build_string (".TXT"));
4ed46869 494
2bfa3d3e 495 dynwind_begin ();
5d1922cd 496 record_unwind_protect_ptr (fclose_ptr_unwind, &fp);
9f62b5dd 497 {
2bfa3d3e 498 dynwind_begin ();
9f62b5dd
BT
499 specbind (Qfile_name_handler_alist, Qnil);
500 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
501 fp = fd < 0 ? 0 : fdopen (fd, "r");
502 if (!fp)
503 {
504 int open_errno = errno;
505 emacs_close (fd);
506 report_file_errno ("Loading charset map", mapfile, open_errno);
507 }
2bfa3d3e 508 dynwind_end ();
9f62b5dd 509 }
4ed46869 510
e96be6dd
BT
511 /* Use record, as `charset_map_entries' is large (larger than
512 MAX_ALLOCA). */
513 head = xmalloc (sizeof *head);
d51c0634 514 head->entry = xmalloc_atomic (0x10000 * sizeof (*head->entry));
a2f3eb19
CY
515 entries = head;
516
e9ce014c 517 n_entries = 0;
3263d5a2
KH
518 while (1)
519 {
caeeedc1 520 unsigned from, to, c;
e9ce014c 521 int idx;
d5172d4f 522 bool eof = 0, overflow = 0;
4ed46869 523
caeeedc1 524 from = read_hex (fp, &eof, &overflow);
3263d5a2
KH
525 if (eof)
526 break;
e9ce014c 527 if (getc (fp) == '-')
caeeedc1 528 to = read_hex (fp, &eof, &overflow);
e9ce014c
KH
529 else
530 to = from;
caeeedc1
PE
531 if (eof)
532 break;
533 c = read_hex (fp, &eof, &overflow);
534 if (eof)
535 break;
ac4137cc 536
caeeedc1
PE
537 if (overflow)
538 continue;
e9ce014c
KH
539 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
540 continue;
ac4137cc 541
f4b1eb36 542 if (n_entries == 0x10000)
3263d5a2 543 {
e96be6dd 544 entries->next = xmalloc (sizeof *entries->next);
d51c0634 545 entries->next->entry = xmalloc_atomic (0x10000 * (sizeof *entries->next));
e9ce014c 546 entries = entries->next;
d311d28c 547 n_entries = 0;
3263d5a2 548 }
d311d28c 549 idx = n_entries;
e9ce014c
KH
550 entries->entry[idx].from = from;
551 entries->entry[idx].to = to;
552 entries->entry[idx].c = c;
553 n_entries++;
3263d5a2
KH
554 }
555 fclose (fp);
5d1922cd 556 fp = NULL;
177c0ea7 557
e9ce014c 558 load_charset_map (charset, head, n_entries, control_flag);
2bfa3d3e 559 dynwind_end ();
4ed46869
KH
560}
561
e9ce014c 562static void
971de7fb 563load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
23d2a7f1 564{
e9ce014c
KH
565 unsigned min_code = CHARSET_MIN_CODE (charset);
566 unsigned max_code = CHARSET_MAX_CODE (charset);
567 struct charset_map_entries *head, *entries;
568 int n_entries;
569 int len = ASIZE (vec);
570 int i;
a2f3eb19 571 USE_SAFE_ALLOCA;
23d2a7f1 572
e9ce014c 573 if (len % 2 == 1)
3263d5a2 574 {
e9ce014c
KH
575 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
576 return;
3263d5a2 577 }
35e623fb 578
a2f3eb19
CY
579 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
580 large (larger than MAX_ALLOCA). */
98c6f1e3 581 head = SAFE_ALLOCA (sizeof *head);
d51c0634 582 head->entry = xmalloc_atomic (0x10000 * (sizeof *head->entry));
a2f3eb19 583 entries = head;
72af86bd 584 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 585
e9ce014c
KH
586 n_entries = 0;
587 for (i = 0; i < len; i += 2)
35e623fb 588 {
e9ce014c
KH
589 Lisp_Object val, val2;
590 unsigned from, to;
d311d28c 591 EMACS_INT c;
e9ce014c 592 int idx;
d2665018 593
e9ce014c
KH
594 val = AREF (vec, i);
595 if (CONSP (val))
bbf12bb3 596 {
e9ce014c
KH
597 val2 = XCDR (val);
598 val = XCAR (val);
e9ce014c
KH
599 from = XFASTINT (val);
600 to = XFASTINT (val2);
bbf12bb3 601 }
e9ce014c 602 else
d311d28c 603 from = to = XFASTINT (val);
e9ce014c
KH
604 val = AREF (vec, i + 1);
605 CHECK_NATNUM (val);
606 c = XFASTINT (val);
76d7b829 607
e9ce014c
KH
608 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
609 continue;
76d7b829 610
dcc694d7 611 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 612 {
98c6f1e3 613 entries->next = SAFE_ALLOCA (sizeof *entries->next);
e9ce014c 614 entries = entries->next;
72af86bd 615 memset (entries, 0, sizeof (struct charset_map_entries));
e9ce014c
KH
616 }
617 idx = n_entries % 0x10000;
618 entries->entry[idx].from = from;
619 entries->entry[idx].to = to;
620 entries->entry[idx].c = c;
621 n_entries++;
622 }
76d7b829 623
e9ce014c 624 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 625 SAFE_FREE ();
ac4137cc
KH
626}
627
ecca2aad
KH
628
629/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
630 map it is (see the comment of load_charset_map for the detail). */
631
3263d5a2 632static void
971de7fb 633load_charset (struct charset *charset, int control_flag)
76d7b829 634{
ecca2aad 635 Lisp_Object map;
76d7b829 636
ecca2aad
KH
637 if (inhibit_load_charset_map
638 && temp_charset_work
639 && charset == temp_charset_work->current
56f00ed2 640 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
641 return;
642
643 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
644 map = CHARSET_MAP (charset);
0ac2c299
PE
645 else
646 {
647 if (! CHARSET_UNIFIED_P (charset))
1088b922 648 emacs_abort ();
0ac2c299
PE
649 map = CHARSET_UNIFY_MAP (charset);
650 }
ecca2aad
KH
651 if (STRINGP (map))
652 load_charset_map_from_file (charset, map, control_flag);
653 else
654 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 655}
76d7b829 656
3263d5a2
KH
657
658DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
659 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
5842a27b 660 (Lisp_Object object)
23d2a7f1 661{
3263d5a2 662 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
663}
664
4ed46869 665
a2cb4e63
PE
666static void
667map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
668 Lisp_Object function, Lisp_Object arg,
669 unsigned int from, unsigned int to)
ecca2aad
KH
670{
671 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
672 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
673 Lisp_Object range;
674 int c, stop;
675 struct gcpro gcpro1;
676
677 range = Fcons (Qnil, Qnil);
678 GCPRO1 (range);
679
680 c = temp_charset_work->min_char;
681 stop = (temp_charset_work->max_char < 0x20000
682 ? temp_charset_work->max_char : 0xFFFF);
51b59d79 683
ecca2aad
KH
684 while (1)
685 {
f6095868 686 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
ecca2aad 687
f6095868 688 if (idx >= from_idx && idx <= to_idx)
ecca2aad
KH
689 {
690 if (NILP (XCAR (range)))
691 XSETCAR (range, make_number (c));
692 }
693 else if (! NILP (XCAR (range)))
694 {
695 XSETCDR (range, make_number (c - 1));
696 if (c_function)
697 (*c_function) (arg, range);
698 else
699 call2 (function, range, arg);
700 XSETCAR (range, Qnil);
701 }
702 if (c == stop)
703 {
704 if (c == temp_charset_work->max_char)
705 {
706 if (! NILP (XCAR (range)))
707 {
708 XSETCDR (range, make_number (c));
709 if (c_function)
710 (*c_function) (arg, range);
711 else
712 call2 (function, range, arg);
713 }
714 break;
715 }
716 c = 0x1FFFF;
717 stop = temp_charset_work->max_char;
718 }
719 c++;
720 }
c542407d 721 UNGCPRO;
ecca2aad
KH
722}
723
4ed46869 724void
6f704c76
DN
725map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
726 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
4ed46869 727{
3263d5a2 728 Lisp_Object range;
d5172d4f
PE
729 bool partial = (from > CHARSET_MIN_CODE (charset)
730 || to < CHARSET_MAX_CODE (charset));
374c5cfd 731
3263d5a2 732 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 733 {
374c5cfd
KH
734 int from_idx = CODE_POINT_TO_INDEX (charset, from);
735 int to_idx = CODE_POINT_TO_INDEX (charset, to);
736 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
737 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
738
ecca2aad
KH
739 if (CHARSET_UNIFIED_P (charset))
740 {
741 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
742 load_charset (charset, 2);
743 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
744 map_char_table_for_charset (c_function, function,
745 CHARSET_DEUNIFIER (charset), arg,
746 partial ? charset : NULL, from, to);
747 else
748 map_charset_for_dump (c_function, function, arg, from, to);
749 }
750
374c5cfd 751 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 752 if (NILP (function))
5af5dd92 753 (*c_function) (arg, range);
3263d5a2
KH
754 else
755 call2 (function, range, arg);
c83ef371 756 }
3263d5a2
KH
757 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
758 {
759 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
760 load_charset (charset, 2);
761 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
762 map_char_table_for_charset (c_function, function,
763 CHARSET_ENCODER (charset), arg,
764 partial ? charset : NULL, from, to);
765 else
766 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 767 }
374c5cfd 768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 769 {
374c5cfd
KH
770 Lisp_Object subset_info;
771 int offset;
772
773 subset_info = CHARSET_SUBSET (charset);
774 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
775 offset = XINT (AREF (subset_info, 3));
776 from -= offset;
777 if (from < XFASTINT (AREF (subset_info, 1)))
778 from = XFASTINT (AREF (subset_info, 1));
779 to -= offset;
780 if (to > XFASTINT (AREF (subset_info, 2)))
781 to = XFASTINT (AREF (subset_info, 2));
782 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 783 }
374c5cfd
KH
784 else /* i.e. CHARSET_METHOD_SUPERSET */
785 {
786 Lisp_Object parents;
4ed46869 787
374c5cfd
KH
788 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
789 parents = XCDR (parents))
bbf12bb3 790 {
374c5cfd
KH
791 int offset;
792 unsigned this_from, this_to;
793
794 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
795 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
796 this_from = from > offset ? from - offset : 0;
797 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
798 if (this_from < CHARSET_MIN_CODE (charset))
799 this_from = CHARSET_MIN_CODE (charset);
800 if (this_to > CHARSET_MAX_CODE (charset))
801 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
802 map_charset_chars (c_function, function, arg, charset,
803 this_from, this_to);
bbf12bb3 804 }
35e623fb 805 }
4ed46869
KH
806}
807
374c5cfd 808DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 809 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 810FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 811argument ARG.
4ed46869 812
374c5cfd
KH
813RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
814characters contained in CHARSET.
4ed46869 815
374c5cfd 816The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 817range of code points (in CHARSET) of target characters. */)
5842a27b 818 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
4ed46869 819{
374c5cfd 820 struct charset *cs;
16fed1fc 821 unsigned from, to;
4ed46869 822
374c5cfd
KH
823 CHECK_CHARSET_GET_CHARSET (charset, cs);
824 if (NILP (from_code))
16fed1fc 825 from = CHARSET_MIN_CODE (cs);
970b7474 826 else
4ed46869 827 {
970b7474
KH
828 from = XINT (from_code);
829 if (from < CHARSET_MIN_CODE (cs))
830 from = CHARSET_MIN_CODE (cs);
4ed46869 831 }
374c5cfd 832 if (NILP (to_code))
970b7474 833 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
834 else
835 {
970b7474
KH
836 to = XINT (to_code);
837 if (to > CHARSET_MAX_CODE (cs))
838 to = CHARSET_MAX_CODE (cs);
4ed46869 839 }
16fed1fc 840 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 841 return Qnil;
35e623fb 842}
4ed46869 843
4ed46869 844
3263d5a2
KH
845/* Define a charset according to the arguments. The Nth argument is
846 the Nth attribute of the charset (the last attribute `charset-id'
847 is not included). See the docstring of `define-charset' for the
848 detail. */
4ed46869 849
3263d5a2
KH
850DEFUN ("define-charset-internal", Fdefine_charset_internal,
851 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
852 doc: /* For internal use only.
853usage: (define-charset-internal ...) */)
f66c7cf8 854 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 855{
3263d5a2
KH
856 /* Charset attr vector. */
857 Lisp_Object attrs;
858 Lisp_Object val;
0de4bb68 859 EMACS_UINT hash_code;
3263d5a2 860 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 861 int i, j;
3263d5a2
KH
862 struct charset charset;
863 int id;
864 int dimension;
d5172d4f 865 bool new_definition_p;
3263d5a2
KH
866 int nchars;
867
868 if (nargs != charset_arg_max)
869 return Fsignal (Qwrong_number_of_arguments,
870 Fcons (intern ("define-charset-internal"),
871 make_number (nargs)));
872
873 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
874
875 CHECK_SYMBOL (args[charset_arg_name]);
876 ASET (attrs, charset_name, args[charset_arg_name]);
877
878 val = args[charset_arg_code_space];
c032b5f8 879 for (i = 0, dimension = 0, nchars = 1; ; i++)
76d7b829 880 {
d311d28c 881 Lisp_Object min_byte_obj, max_byte_obj;
3263d5a2
KH
882 int min_byte, max_byte;
883
d311d28c
PE
884 min_byte_obj = Faref (val, make_number (i * 2));
885 max_byte_obj = Faref (val, make_number (i * 2 + 1));
af5a5a98 886 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
d311d28c 887 min_byte = XINT (min_byte_obj);
af5a5a98 888 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
d311d28c 889 max_byte = XINT (max_byte_obj);
3263d5a2
KH
890 charset.code_space[i * 4] = min_byte;
891 charset.code_space[i * 4 + 1] = max_byte;
892 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
3263d5a2
KH
893 if (max_byte > 0)
894 dimension = i + 1;
c032b5f8
PE
895 if (i == 3)
896 break;
897 nchars *= charset.code_space[i * 4 + 2];
898 charset.code_space[i * 4 + 3] = nchars;
3263d5a2 899 }
4ed46869 900
3263d5a2
KH
901 val = args[charset_arg_dimension];
902 if (NILP (val))
903 charset.dimension = dimension;
904 else
4ed46869 905 {
af5a5a98 906 CHECK_RANGED_INTEGER (val, 1, 4);
3263d5a2 907 charset.dimension = XINT (val);
4ed46869
KH
908 }
909
3263d5a2
KH
910 charset.code_linear_p
911 = (charset.dimension == 1
912 || (charset.code_space[2] == 256
913 && (charset.dimension == 2
914 || (charset.code_space[6] == 256
915 && (charset.dimension == 3
916 || charset.code_space[10] == 256)))));
917
69f8de5b 918 if (! charset.code_linear_p)
4ed46869 919 {
c1ade6f7 920 charset.code_space_mask = xzalloc_atomic (256);
69f8de5b
KH
921 for (i = 0; i < 4; i++)
922 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
923 j++)
924 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
925 }
926
3263d5a2 927 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 928
3263d5a2
KH
929 charset.min_code = (charset.code_space[0]
930 | (charset.code_space[4] << 8)
931 | (charset.code_space[8] << 16)
3c7649c1 932 | ((unsigned) charset.code_space[12] << 24));
3263d5a2
KH
933 charset.max_code = (charset.code_space[1]
934 | (charset.code_space[5] << 8)
935 | (charset.code_space[9] << 16)
3c7649c1 936 | ((unsigned) charset.code_space[13] << 24));
820ee249 937 charset.char_index_offset = 0;
8a73a704 938
820ee249
KH
939 val = args[charset_arg_min_code];
940 if (! NILP (val))
941 {
be44ca6c 942 unsigned code = cons_to_unsigned (val, UINT_MAX);
fdb82f93 943
820ee249
KH
944 if (code < charset.min_code
945 || code > charset.max_code)
3c7649c1
PE
946 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
947 make_fixnum_or_float (charset.max_code), val);
820ee249
KH
948 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
949 charset.min_code = code;
950 }
3fac5a51 951
820ee249
KH
952 val = args[charset_arg_max_code];
953 if (! NILP (val))
3fac5a51 954 {
be44ca6c 955 unsigned code = cons_to_unsigned (val, UINT_MAX);
820ee249 956
820ee249
KH
957 if (code < charset.min_code
958 || code > charset.max_code)
3c7649c1
PE
959 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
960 make_fixnum_or_float (charset.max_code), val);
820ee249 961 charset.max_code = code;
3fac5a51 962 }
3fac5a51 963
ecca2aad 964 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 965
3263d5a2
KH
966 val = args[charset_arg_invalid_code];
967 if (NILP (val))
968 {
969 if (charset.min_code > 0)
970 charset.invalid_code = 0;
bbf12bb3
KH
971 else
972 {
3c7649c1 973 if (charset.max_code < UINT_MAX)
3263d5a2
KH
974 charset.invalid_code = charset.max_code + 1;
975 else
976 error ("Attribute :invalid-code must be specified");
76d7b829 977 }
76d7b829 978 }
3263d5a2 979 else
3c7649c1 980 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
4ed46869 981
3263d5a2
KH
982 val = args[charset_arg_iso_final];
983 if (NILP (val))
984 charset.iso_final = -1;
985 else
986 {
987 CHECK_NUMBER (val);
988 if (XINT (val) < '0' || XINT (val) > 127)
c2982e87 989 error ("Invalid iso-final-char: %"pI"d", XINT (val));
3263d5a2
KH
990 charset.iso_final = XINT (val);
991 }
4ed46869 992
3263d5a2
KH
993 val = args[charset_arg_iso_revision];
994 if (NILP (val))
995 charset.iso_revision = -1;
996 else
4ed46869 997 {
af5a5a98 998 CHECK_RANGED_INTEGER (val, -1, 63);
3263d5a2 999 charset.iso_revision = XINT (val);
4ed46869 1000 }
4ed46869 1001
3263d5a2
KH
1002 val = args[charset_arg_emacs_mule_id];
1003 if (NILP (val))
1004 charset.emacs_mule_id = -1;
4ed46869
KH
1005 else
1006 {
3263d5a2
KH
1007 CHECK_NATNUM (val);
1008 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
c2982e87 1009 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
3263d5a2 1010 charset.emacs_mule_id = XINT (val);
c83ef371 1011 }
f6302ac9 1012
3263d5a2 1013 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1014
3263d5a2 1015 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1016
3263d5a2
KH
1017 charset.unified_p = 0;
1018
72af86bd 1019 memset (charset.fast_map, 0, sizeof (charset.fast_map));
3263d5a2
KH
1020
1021 if (! NILP (args[charset_arg_code_offset]))
1022 {
1023 val = args[charset_arg_code_offset];
3c7649c1 1024 CHECK_CHARACTER (val);
3263d5a2
KH
1025
1026 charset.method = CHARSET_METHOD_OFFSET;
1027 charset.code_offset = XINT (val);
1028
3263d5a2 1029 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
3c7649c1 1030 if (MAX_CHAR - charset.code_offset < i)
3263d5a2 1031 error ("Unsupported max char: %d", charset.max_char);
3c7649c1
PE
1032 charset.max_char = i + charset.code_offset;
1033 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1034 charset.min_char = i + charset.code_offset;
3263d5a2 1035
f148205f
KH
1036 i = (charset.min_char >> 7) << 7;
1037 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1038 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1039 i = (i >> 12) << 12;
3263d5a2
KH
1040 for (; i <= charset.max_char; i += 0x1000)
1041 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1042 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1043 charset.ascii_compatible_p = 1;
3263d5a2
KH
1044 }
1045 else if (! NILP (args[charset_arg_map]))
1046 {
1047 val = args[charset_arg_map];
1048 ASET (attrs, charset_map, val);
ecca2aad 1049 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1050 }
374c5cfd 1051 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1052 {
374c5cfd
KH
1053 Lisp_Object parent;
1054 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1055 struct charset *parent_charset;
1056
1057 val = args[charset_arg_subset];
1058 parent = Fcar (val);
1059 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1060 parent_min_code = Fnth (make_number (1), val);
1061 CHECK_NATNUM (parent_min_code);
1062 parent_max_code = Fnth (make_number (2), val);
1063 CHECK_NATNUM (parent_max_code);
1064 parent_code_offset = Fnth (make_number (3), val);
1065 CHECK_NUMBER (parent_code_offset);
25721f5b 1066 val = make_uninit_vector (4);
374c5cfd
KH
1067 ASET (val, 0, make_number (parent_charset->id));
1068 ASET (val, 1, parent_min_code);
1069 ASET (val, 2, parent_max_code);
1070 ASET (val, 3, parent_code_offset);
1071 ASET (attrs, charset_subset, val);
1072
1073 charset.method = CHARSET_METHOD_SUBSET;
1074 /* Here, we just copy the parent's fast_map. It's not accurate,
1075 but at least it works for quickly detecting which character
1076 DOESN'T belong to this charset. */
1077 for (i = 0; i < 190; i++)
1078 charset.fast_map[i] = parent_charset->fast_map[i];
1079
1080 /* We also copy these for parents. */
1081 charset.min_char = parent_charset->min_char;
1082 charset.max_char = parent_charset->max_char;
1083 }
1084 else if (! NILP (args[charset_arg_superset]))
0282eb69 1085 {
374c5cfd
KH
1086 val = args[charset_arg_superset];
1087 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1088 val = Fcopy_sequence (val);
374c5cfd 1089 ASET (attrs, charset_superset, val);
3263d5a2
KH
1090
1091 charset.min_char = MAX_CHAR;
1092 charset.max_char = 0;
1093 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1094 {
3263d5a2
KH
1095 Lisp_Object elt, car_part, cdr_part;
1096 int this_id, offset;
1097 struct charset *this_charset;
2e344af3 1098
3263d5a2
KH
1099 elt = Fcar (val);
1100 if (CONSP (elt))
2e344af3 1101 {
3263d5a2
KH
1102 car_part = XCAR (elt);
1103 cdr_part = XCDR (elt);
1104 CHECK_CHARSET_GET_ID (car_part, this_id);
d311d28c 1105 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
3263d5a2 1106 offset = XINT (cdr_part);
177c0ea7 1107 }
3263d5a2 1108 else
4ed46869 1109 {
3263d5a2
KH
1110 CHECK_CHARSET_GET_ID (elt, this_id);
1111 offset = 0;
4ed46869 1112 }
3263d5a2
KH
1113 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1114
1115 this_charset = CHARSET_FROM_ID (this_id);
1116 if (charset.min_char > this_charset->min_char)
1117 charset.min_char = this_charset->min_char;
1118 if (charset.max_char < this_charset->max_char)
1119 charset.max_char = this_charset->max_char;
1120 for (i = 0; i < 190; i++)
1121 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1122 }
0282eb69 1123 }
2e344af3 1124 else
3263d5a2 1125 error ("None of :code-offset, :map, :parents are specified");
05505664 1126
3263d5a2
KH
1127 val = args[charset_arg_unify_map];
1128 if (! NILP (val) && !STRINGP (val))
1129 CHECK_VECTOR (val);
1130 ASET (attrs, charset_unify_map, val);
05505664 1131
3263d5a2
KH
1132 CHECK_LIST (args[charset_arg_plist]);
1133 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1134
3263d5a2
KH
1135 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1136 &hash_code);
1137 if (charset.hash_index >= 0)
1138 {
1139 new_definition_p = 0;
4f65af01 1140 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
e83064be 1141 set_hash_value_slot (hash_table, charset.hash_index, attrs);
3263d5a2 1142 }
1a45ff10 1143 else
3263d5a2
KH
1144 {
1145 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1146 hash_code);
1147 if (charset_table_used == charset_table_size)
1148 {
e097a6fa 1149 /* Ensure that charset IDs fit into 'int' as well as into the
0065d054
PE
1150 restriction imposed by fixnums. Although the 'int' restriction
1151 could be removed, too much other code would need altering; for
1152 example, the IDs are stuffed into struct
1153 coding_system.charbuf[i] entries, which are 'int'. */
1154 int old_size = charset_table_size;
2dd2e622 1155 ptrdiff_t new_size = old_size;
0065d054 1156 struct charset *new_table =
2dd2e622 1157 xpalloc (0, &new_size, 1,
0065d054
PE
1158 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1159 sizeof *charset_table);
1160 memcpy (new_table, charset_table, old_size * sizeof *new_table);
2fe1edd1 1161 charset_table = new_table;
2dd2e622 1162 charset_table_size = new_size;
f701dc2a
PE
1163 /* FIXME: This leaks memory, as the old charset_table becomes
1164 unreachable. If the old charset table is charset_table_init
1165 then this leak is intentional; otherwise, it's unclear.
1166 If the latter memory leak is intentional, a
0065d054
PE
1167 comment should be added to explain this. If not, the old
1168 charset_table should be freed, by passing it as the 1st argument
1169 to xpalloc and removing the memcpy. */
3263d5a2
KH
1170 }
1171 id = charset_table_used++;
3263d5a2
KH
1172 new_definition_p = 1;
1173 }
2e344af3 1174
4f65af01 1175 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1176 charset.id = id;
1177 charset_table[id] = charset;
2e344af3 1178
ecca2aad 1179 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1180 {
1181 load_charset (&charset, 0);
1182 charset_table[id] = charset;
1183 }
ecca2aad 1184
3263d5a2 1185 if (charset.iso_final >= 0)
4ed46869 1186 {
3263d5a2
KH
1187 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1188 charset.iso_final) = id;
1189 if (new_definition_p)
1190 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
6c6f1994 1191 list1 (make_number (id)));
7c7dceee
KH
1192 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1193 charset_jisx0201_roman = id;
1194 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1195 charset_jisx0208_1978 = id;
1196 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1197 charset_jisx0208 = id;
d32320c4
KH
1198 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1199 charset_ksc5601 = id;
4ed46869 1200 }
d0cf2d48 1201
3263d5a2 1202 if (charset.emacs_mule_id >= 0)
4ed46869 1203 {
b84ae584 1204 emacs_mule_charset[charset.emacs_mule_id] = id;
4f65af01
KH
1205 if (charset.emacs_mule_id < 0xA0)
1206 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1207 else
1208 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1209 if (new_definition_p)
1210 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
6c6f1994 1211 list1 (make_number (id)));
4ed46869
KH
1212 }
1213
3263d5a2
KH
1214 if (new_definition_p)
1215 {
1216 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1217 if (charset.supplementary_p)
1218 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
6c6f1994 1219 list1 (make_number (id)));
8055c66a 1220 else
880820fe
KH
1221 {
1222 Lisp_Object tail;
1223
1224 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1225 {
1226 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1227
1228 if (cs->supplementary_p)
1229 break;
1230 }
1231 if (EQ (tail, Vcharset_ordered_list))
1232 Vcharset_ordered_list = Fcons (make_number (id),
1233 Vcharset_ordered_list);
1234 else if (NILP (tail))
1235 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
6c6f1994 1236 list1 (make_number (id)));
880820fe
KH
1237 else
1238 {
1239 val = Fcons (XCAR (tail), XCDR (tail));
1240 XSETCDR (tail, val);
1241 XSETCAR (tail, make_number (id));
1242 }
1243 }
dbbb237d 1244 charset_ordered_list_tick++;
3263d5a2 1245 }
4ed46869 1246
3263d5a2 1247 return Qnil;
4ed46869
KH
1248}
1249
2fe1edd1
KH
1250
1251/* Same as Fdefine_charset_internal but arguments are more convenient
1252 to call from C (typically in syms_of_charset). This can define a
1253 charset of `offset' method only. Return the ID of the new
1254 charset. */
1255
1256static int
dd4c5104
DN
1257define_charset_internal (Lisp_Object name,
1258 int dimension,
dfb6afda 1259 const char *code_space_chars,
dd4c5104
DN
1260 unsigned min_code, unsigned max_code,
1261 int iso_final, int iso_revision, int emacs_mule_id,
d5172d4f 1262 bool ascii_compatible, bool supplementary,
dd4c5104 1263 int code_offset)
2fe1edd1 1264{
dfb6afda 1265 const unsigned char *code_space = (const unsigned char *) code_space_chars;
2fe1edd1 1266 Lisp_Object args[charset_arg_max];
2fe1edd1
KH
1267 Lisp_Object val;
1268 int i;
1269
1270 args[charset_arg_name] = name;
1271 args[charset_arg_dimension] = make_number (dimension);
25721f5b 1272 val = make_uninit_vector (8);
2fe1edd1
KH
1273 for (i = 0; i < 8; i++)
1274 ASET (val, i, make_number (code_space[i]));
1275 args[charset_arg_code_space] = val;
1276 args[charset_arg_min_code] = make_number (min_code);
1277 args[charset_arg_max_code] = make_number (max_code);
1278 args[charset_arg_iso_final]
1279 = (iso_final < 0 ? Qnil : make_number (iso_final));
1280 args[charset_arg_iso_revision] = make_number (iso_revision);
1281 args[charset_arg_emacs_mule_id]
1282 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1283 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1284 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1285 args[charset_arg_invalid_code] = Qnil;
1286 args[charset_arg_code_offset] = make_number (code_offset);
1287 args[charset_arg_map] = Qnil;
1288 args[charset_arg_subset] = Qnil;
1289 args[charset_arg_superset] = Qnil;
1290 args[charset_arg_unify_map] = Qnil;
1291
694b6c97 1292 args[charset_arg_plist] =
3438fe21 1293 listn (CONSTYPE_HEAP, 14,
694b6c97
DA
1294 intern_c_string (":name"),
1295 args[charset_arg_name],
1296 intern_c_string (":dimension"),
1297 args[charset_arg_dimension],
1298 intern_c_string (":code-space"),
1299 args[charset_arg_code_space],
1300 intern_c_string (":iso-final-char"),
1301 args[charset_arg_iso_final],
1302 intern_c_string (":emacs-mule-id"),
1303 args[charset_arg_emacs_mule_id],
1304 intern_c_string (":ascii-compatible-p"),
1305 args[charset_arg_ascii_compatible_p],
1306 intern_c_string (":code-offset"),
1307 args[charset_arg_code_offset]);
2fe1edd1
KH
1308 Fdefine_charset_internal (charset_arg_max, args);
1309
1310 return XINT (CHARSET_SYMBOL_ID (name));
1311}
1312
1313
3263d5a2
KH
1314DEFUN ("define-charset-alias", Fdefine_charset_alias,
1315 Sdefine_charset_alias, 2, 2, 0,
1316 doc: /* Define ALIAS as an alias for charset CHARSET. */)
5842a27b 1317 (Lisp_Object alias, Lisp_Object charset)
4ed46869 1318{
3263d5a2
KH
1319 Lisp_Object attr;
1320
1321 CHECK_CHARSET_GET_ATTR (charset, attr);
1322 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1323 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1324 return Qnil;
1325}
4ed46869 1326
4ed46869 1327
3263d5a2 1328DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1329 doc: /* Return the property list of CHARSET. */)
5842a27b 1330 (Lisp_Object charset)
3263d5a2
KH
1331{
1332 Lisp_Object attrs;
1333
1334 CHECK_CHARSET_GET_ATTR (charset, attrs);
1335 return CHARSET_ATTR_PLIST (attrs);
1336}
1337
1338
1339DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1340 doc: /* Set CHARSET's property list to PLIST. */)
5842a27b 1341 (Lisp_Object charset, Lisp_Object plist)
3263d5a2
KH
1342{
1343 Lisp_Object attrs;
1344
1345 CHECK_CHARSET_GET_ATTR (charset, attrs);
4939150c 1346 ASET (attrs, charset_plist, plist);
3263d5a2
KH
1347 return plist;
1348}
1349
1350
dbbb237d 1351DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1352 doc: /* Unify characters of CHARSET with Unicode.
1353This means reading the relevant file and installing the table defined
dbbb237d
KH
1354by CHARSET's `:unify-map' property.
1355
64165ae2
DL
1356Optional second arg UNIFY-MAP is a file name string or a vector. It has
1357the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1358`define-charset' (which see).
1359
1360Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
5842a27b 1361 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
8a73a704 1362{
3263d5a2
KH
1363 int id;
1364 struct charset *cs;
8f924df7 1365
3263d5a2
KH
1366 CHECK_CHARSET_GET_ID (charset, id);
1367 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1368 if (NILP (deunify)
1369 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1370 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1371 return Qnil;
dbbb237d 1372
3263d5a2 1373 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1374 if (NILP (deunify))
1375 {
ecca2aad
KH
1376 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1377 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1378 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1379 if (NILP (unify_map))
1380 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1381 else
ecca2aad
KH
1382 {
1383 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1384 signal_error ("Bad unify-map", unify_map);
4939150c 1385 set_charset_attr (cs, charset_unify_map, unify_map);
ecca2aad
KH
1386 }
1387 if (NILP (Vchar_unify_table))
1388 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1389 char_table_set_range (Vchar_unify_table,
1390 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1391 CHARSET_UNIFIED_P (cs) = 1;
1392 }
1393 else if (CHAR_TABLE_P (Vchar_unify_table))
1394 {
3c7649c1
PE
1395 unsigned min_code = CHARSET_MIN_CODE (cs);
1396 unsigned max_code = CHARSET_MAX_CODE (cs);
dbbb237d
KH
1397 int min_char = DECODE_CHAR (cs, min_code);
1398 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1399
dbbb237d
KH
1400 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1401 }
8f924df7 1402
3263d5a2 1403 return Qnil;
8a73a704
KH
1404}
1405
3fac5a51
KH
1406DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1407 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1408 doc: /*
d0cf2d48 1409Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1410DIMENSION is the number of bytes to represent a character: 1 or 2.
1411CHARS is the number of characters in a dimension: 94 or 96.
1412
1413This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1414If there's no unused final char for the specified kind of charset,
fdb82f93 1415return nil. */)
5842a27b 1416 (Lisp_Object dimension, Lisp_Object chars)
3fac5a51
KH
1417{
1418 int final_char;
1419
b7826503
PJ
1420 CHECK_NUMBER (dimension);
1421 CHECK_NUMBER (chars);
3263d5a2
KH
1422 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1423 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1424 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1425 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1426 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1427 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1428 break;
3fac5a51
KH
1429 return (final_char <= '?' ? make_number (final_char) : Qnil);
1430}
1431
3263d5a2 1432static void
971de7fb 1433check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
4ed46869 1434{
3263d5a2
KH
1435 CHECK_NATNUM (dimension);
1436 CHECK_NATNUM (chars);
e6c3da20 1437 CHECK_CHARACTER (final_char);
4ed46869 1438
3263d5a2 1439 if (XINT (dimension) > 3)
c2982e87 1440 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
5fdb398c 1441 XINT (dimension));
4ed46869 1442 if (XINT (chars) != 94 && XINT (chars) != 96)
c2982e87 1443 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
3263d5a2 1444 if (XINT (final_char) < '0' || XINT (final_char) > '~')
e6c3da20
EZ
1445 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'",
1446 (int)XINT (final_char));
3263d5a2
KH
1447}
1448
1449
1450DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1451 4, 4, 0,
cefd8c4f
KH
1452 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1453
1454On decoding by an ISO-2022 base coding system, when a charset
1455specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1456if CHARSET is designated instead. */)
5842a27b 1457 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
3263d5a2
KH
1458{
1459 int id;
d5172d4f 1460 bool chars_flag;
4ed46869 1461
3263d5a2
KH
1462 CHECK_CHARSET_GET_ID (charset, id);
1463 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1464 chars_flag = XINT (chars) == 96;
1465 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1466 return Qnil;
1467}
1468
3263d5a2 1469
2e344af3
KH
1470/* Return information about charsets in the text at PTR of NBYTES
1471 bytes, which are NCHARS characters. The value is:
f6302ac9 1472
cfe34140 1473 0: Each character is represented by one byte. This is always
3263d5a2
KH
1474 true for a unibyte string. For a multibyte string, true if
1475 it contains only ASCII characters.
1476
28c026cd
DL
1477 1: No charsets other than ascii, control-1, and latin-1 are
1478 found.
1d67c29b 1479
3263d5a2
KH
1480 2: Otherwise.
1481*/
4ed46869
KH
1482
1483int
971de7fb 1484string_xstring_p (Lisp_Object string)
4ed46869 1485{
8f924df7
KH
1486 const unsigned char *p = SDATA (string);
1487 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1488
8f924df7 1489 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1490 return 0;
1491
3263d5a2 1492 while (p < endp)
0282eb69 1493 {
3263d5a2 1494 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1495
3cc67a4d 1496 if (c >= 0x100)
3263d5a2 1497 return 2;
0282eb69 1498 }
3263d5a2
KH
1499 return 1;
1500}
05505664 1501
05505664 1502
3263d5a2 1503/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1504
3cc67a4d
KH
1505 CHARSETS is a vector. If Nth element is non-nil, it means the
1506 charset whose id is N is already found.
2e344af3 1507
3263d5a2 1508 It may lookup a translation table TABLE if supplied. */
2e344af3 1509
3263d5a2 1510static void
d5172d4f
PE
1511find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1512 ptrdiff_t nbytes, Lisp_Object charsets,
1513 Lisp_Object table, bool multibyte)
3263d5a2 1514{
dbbb237d 1515 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1516
1517 if (nchars == nbytes)
3263d5a2 1518 {
3cc67a4d
KH
1519 if (multibyte)
1520 ASET (charsets, charset_ascii, Qt);
1521 else
1522 while (ptr < pend)
1523 {
1524 int c = *ptr++;
1525
1526 if (!NILP (table))
1527 c = translate_char (table, c);
200fc949 1528 if (ASCII_CHAR_P (c))
3cc67a4d
KH
1529 ASET (charsets, charset_ascii, Qt);
1530 else
1531 ASET (charsets, charset_eight_bit, Qt);
1532 }
1533 }
1534 else
1535 {
1536 while (ptr < pend)
3263d5a2 1537 {
3cc67a4d
KH
1538 int c = STRING_CHAR_ADVANCE (ptr);
1539 struct charset *charset;
3263d5a2 1540
3cc67a4d
KH
1541 if (!NILP (table))
1542 c = translate_char (table, c);
1543 charset = CHAR_CHARSET (c);
1544 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1545 }
4ed46869 1546 }
4ed46869
KH
1547}
1548
1549DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1550 2, 3, 0,
fdb82f93
PJ
1551 doc: /* Return a list of charsets in the region between BEG and END.
1552BEG and END are buffer positions.
1553Optional arg TABLE if non-nil is a translation table to look up.
1554
fdb82f93
PJ
1555If the current buffer is unibyte, the returned list may contain
1556only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1557 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
4ed46869 1558{
3263d5a2 1559 Lisp_Object charsets;
d311d28c 1560 ptrdiff_t from, from_byte, to, stop, stop_byte;
42ca828e 1561 int i;
4ed46869 1562 Lisp_Object val;
d5172d4f 1563 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
4ed46869
KH
1564
1565 validate_region (&beg, &end);
1566 from = XFASTINT (beg);
1567 stop = to = XFASTINT (end);
6ae1f27e 1568
4ed46869 1569 if (from < GPT && GPT < to)
6ae1f27e
RS
1570 {
1571 stop = GPT;
1572 stop_byte = GPT_BYTE;
1573 }
1574 else
1575 stop_byte = CHAR_TO_BYTE (stop);
1576
1577 from_byte = CHAR_TO_BYTE (from);
1578
3263d5a2 1579 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1580 while (1)
1581 {
3263d5a2 1582 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1583 stop_byte - from_byte, charsets, table,
1584 multibyte);
4ed46869 1585 if (stop < to)
6ae1f27e
RS
1586 {
1587 from = stop, from_byte = stop_byte;
1588 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1589 }
4ed46869
KH
1590 else
1591 break;
1592 }
6ae1f27e 1593
4ed46869 1594 val = Qnil;
3263d5a2 1595 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1596 if (!NILP (AREF (charsets, i)))
3263d5a2 1597 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1598 return val;
1599}
1600
1601DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1602 1, 2, 0,
fdb82f93
PJ
1603 doc: /* Return a list of charsets in STR.
1604Optional arg TABLE if non-nil is a translation table to look up.
1605
fdb82f93 1606If STR is unibyte, the returned list may contain
3263d5a2 1607only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1608 (Lisp_Object str, Lisp_Object table)
4ed46869 1609{
3263d5a2 1610 Lisp_Object charsets;
4ed46869
KH
1611 int i;
1612 Lisp_Object val;
1613
b7826503 1614 CHECK_STRING (str);
87b089ad 1615
3263d5a2 1616 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1617 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1618 charsets, table,
1619 STRING_MULTIBYTE (str));
4ed46869 1620 val = Qnil;
3263d5a2 1621 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1622 if (!NILP (AREF (charsets, i)))
3263d5a2 1623 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1624 return val;
1625}
2e344af3 1626
4ed46869 1627\f
3263d5a2 1628
ecca2aad
KH
1629/* Return a unified character code for C (>= 0x110000). VAL is a
1630 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1631 charset symbol. */
41c8bfcf 1632static int
971de7fb 1633maybe_unify_char (int c, Lisp_Object val)
ecca2aad
KH
1634{
1635 struct charset *charset;
1636
1637 if (INTEGERP (val))
ccd9a01a 1638 return XFASTINT (val);
ecca2aad
KH
1639 if (NILP (val))
1640 return c;
1641
1642 CHECK_CHARSET_GET_CHARSET (val, charset);
291d430f 1643#ifdef REL_ALLOC
efc00ab1 1644 /* The call to load_charset below can allocate memory, which screws
291d430f
EZ
1645 callers of this function through STRING_CHAR_* macros that hold C
1646 pointers to buffer text, if REL_ALLOC is used. */
1647 r_alloc_inhibit_buffer_relocation (1);
1648#endif
ecca2aad
KH
1649 load_charset (charset, 1);
1650 if (! inhibit_load_charset_map)
1651 {
1652 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1653 if (! NILP (val))
ccd9a01a 1654 c = XFASTINT (val);
ecca2aad
KH
1655 }
1656 else
1657 {
1658 int code_index = c - CHARSET_CODE_OFFSET (charset);
1659 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1660
1661 if (unified > 0)
1662 c = unified;
1663 }
291d430f
EZ
1664#ifdef REL_ALLOC
1665 r_alloc_inhibit_buffer_relocation (0);
1666#endif
ecca2aad
KH
1667 return c;
1668}
1669
1670
bbd240ce 1671/* Return a character corresponding to the code-point CODE of
3263d5a2
KH
1672 CHARSET. */
1673
1674int
971de7fb 1675decode_char (struct charset *charset, unsigned int code)
4ed46869 1676{
3263d5a2
KH
1677 int c, char_index;
1678 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1679
3263d5a2
KH
1680 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1681 return -1;
4ed46869 1682
374c5cfd 1683 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1684 {
374c5cfd
KH
1685 Lisp_Object subset_info;
1686
1687 subset_info = CHARSET_SUBSET (charset);
1688 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1689 code -= XINT (AREF (subset_info, 3));
1690 if (code < XFASTINT (AREF (subset_info, 1))
1691 || code > XFASTINT (AREF (subset_info, 2)))
1692 c = -1;
1693 else
1694 c = DECODE_CHAR (charset, code);
2e344af3 1695 }
374c5cfd 1696 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1697 {
3263d5a2 1698 Lisp_Object parents;
4ed46869 1699
374c5cfd 1700 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1701 c = -1;
1702 for (; CONSP (parents); parents = XCDR (parents))
1703 {
1704 int id = XINT (XCAR (XCAR (parents)));
1705 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1706 unsigned this_code = code - code_offset;
4ed46869 1707
3263d5a2
KH
1708 charset = CHARSET_FROM_ID (id);
1709 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1710 break;
1711 }
1712 }
1713 else
ac4137cc 1714 {
3263d5a2 1715 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1716 if (char_index < 0)
1717 return -1;
4ed46869 1718
3263d5a2 1719 if (method == CHARSET_METHOD_MAP)
ac4137cc 1720 {
3263d5a2 1721 Lisp_Object decoder;
4ed46869 1722
3263d5a2
KH
1723 decoder = CHARSET_DECODER (charset);
1724 if (! VECTORP (decoder))
ecca2aad
KH
1725 {
1726 load_charset (charset, 1);
1727 decoder = CHARSET_DECODER (charset);
1728 }
1729 if (VECTORP (decoder))
1730 c = XINT (AREF (decoder, char_index));
1731 else
1732 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1733 }
ecca2aad 1734 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1735 {
3263d5a2 1736 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad 1737 if (CHARSET_UNIFIED_P (charset)
41c8bfcf
PE
1738 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1739 {
1740 /* Unify C with a Unicode character if possible. */
1741 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1742 c = maybe_unify_char (c, val);
1743 }
ac4137cc
KH
1744 }
1745 }
4ed46869 1746
3263d5a2 1747 return c;
90d7b74e
KH
1748}
1749
374c5cfd
KH
1750/* Variable used temporarily by the macro ENCODE_CHAR. */
1751Lisp_Object charset_work;
4ed46869 1752
d5172d4f 1753/* Return a code-point of C in CHARSET. If C doesn't belong to
28c026cd
DL
1754 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1755 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1756
3263d5a2 1757unsigned
971de7fb 1758encode_char (struct charset *charset, int c)
9d3d8cba 1759{
3263d5a2
KH
1760 unsigned code;
1761 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1762
3263d5a2 1763 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1764 {
6809ca75 1765 Lisp_Object deunifier;
ecca2aad 1766 int code_index = -1;
4ed46869 1767
3263d5a2
KH
1768 deunifier = CHARSET_DEUNIFIER (charset);
1769 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1770 {
ecca2aad 1771 load_charset (charset, 2);
3263d5a2 1772 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1773 }
ecca2aad
KH
1774 if (CHAR_TABLE_P (deunifier))
1775 {
1776 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1777
1778 if (INTEGERP (deunified))
1779 code_index = XINT (deunified);
1780 }
1781 else
1782 {
1783 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1784 }
1785 if (code_index >= 0)
1786 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1787 }
9d3d8cba 1788
374c5cfd
KH
1789 if (method == CHARSET_METHOD_SUBSET)
1790 {
1791 Lisp_Object subset_info;
1792 struct charset *this_charset;
1793
1794 subset_info = CHARSET_SUBSET (charset);
1795 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1796 code = ENCODE_CHAR (this_charset, c);
1797 if (code == CHARSET_INVALID_CODE (this_charset)
1798 || code < XFASTINT (AREF (subset_info, 1))
1799 || code > XFASTINT (AREF (subset_info, 2)))
1800 return CHARSET_INVALID_CODE (charset);
1801 code += XINT (AREF (subset_info, 3));
1802 return code;
1803 }
9d3d8cba 1804
374c5cfd 1805 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1806 {
3263d5a2 1807 Lisp_Object parents;
d2665018 1808
374c5cfd 1809 parents = CHARSET_SUPERSET (charset);
3263d5a2 1810 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1811 {
3263d5a2
KH
1812 int id = XINT (XCAR (XCAR (parents)));
1813 int code_offset = XINT (XCDR (XCAR (parents)));
1814 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1815
3263d5a2 1816 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1817 if (code != CHARSET_INVALID_CODE (this_charset))
1818 return code + code_offset;
beeedaad 1819 }
3263d5a2
KH
1820 return CHARSET_INVALID_CODE (charset);
1821 }
1bcc1567 1822
15c85a88
KH
1823 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1824 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1825 return CHARSET_INVALID_CODE (charset);
1bcc1567 1826
3263d5a2 1827 if (method == CHARSET_METHOD_MAP)
3f62427c 1828 {
3263d5a2 1829 Lisp_Object encoder;
beeedaad 1830 Lisp_Object val;
9b6a601f 1831
3263d5a2
KH
1832 encoder = CHARSET_ENCODER (charset);
1833 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1834 {
1835 load_charset (charset, 2);
1836 encoder = CHARSET_ENCODER (charset);
1837 }
1838 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1839 {
1840 val = CHAR_TABLE_REF (encoder, c);
1841 if (NILP (val))
1842 return CHARSET_INVALID_CODE (charset);
1843 code = XINT (val);
1844 if (! CHARSET_COMPACT_CODES_P (charset))
1845 code = INDEX_TO_CODE_POINT (charset, code);
1846 }
1847 else
1848 {
1849 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
51b59d79 1850 code = INDEX_TO_CODE_POINT (charset, code);
ecca2aad 1851 }
3263d5a2 1852 }
820ee249 1853 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1854 {
58c8a77d
PE
1855 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1856
1857 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1858 }
8ac5a9cc 1859
3263d5a2 1860 return code;
4ed46869
KH
1861}
1862
4ed46869 1863
3263d5a2
KH
1864DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1865 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1866Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1867
328a8179 1868CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
5842a27b 1869 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
4ed46869 1870{
3263d5a2
KH
1871 int c, id;
1872 unsigned code;
1873 struct charset *charsetp;
859f2b3c 1874
3263d5a2 1875 CHECK_CHARSET_GET_ID (charset, id);
be44ca6c 1876 code = cons_to_unsigned (code_point, UINT_MAX);
3263d5a2
KH
1877 charsetp = CHARSET_FROM_ID (id);
1878 c = DECODE_CHAR (charsetp, code);
1879 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1880}
1881
859f2b3c 1882
3263d5a2
KH
1883DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1884 doc: /* Encode the character CH into a code-point of CHARSET.
328a8179 1885Return nil if CHARSET doesn't include CH. */)
5842a27b 1886 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
4ed46869 1887{
34206dd2 1888 int c, id;
3263d5a2
KH
1889 unsigned code;
1890 struct charset *charsetp;
046b1f03 1891
3263d5a2 1892 CHECK_CHARSET_GET_ID (charset, id);
34206dd2
PE
1893 CHECK_CHARACTER (ch);
1894 c = XFASTINT (ch);
3263d5a2 1895 charsetp = CHARSET_FROM_ID (id);
34206dd2 1896 code = ENCODE_CHAR (charsetp, c);
3263d5a2
KH
1897 if (code == CHARSET_INVALID_CODE (charsetp))
1898 return Qnil;
be44ca6c 1899 return INTEGER_TO_CONS (code);
beeedaad
KH
1900}
1901
beeedaad 1902
b121a744
KH
1903DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1904 doc:
1905 /* Return a character of CHARSET whose position codes are CODEn.
1906
1907CODE1 through CODE4 are optional, but if you don't supply sufficient
1908position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1909is specified. */)
5842a27b 1910 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
beeedaad 1911{
3263d5a2
KH
1912 int id, dimension;
1913 struct charset *charsetp;
b121a744
KH
1914 unsigned code;
1915 int c;
87b089ad 1916
3263d5a2
KH
1917 CHECK_CHARSET_GET_ID (charset, id);
1918 charsetp = CHARSET_FROM_ID (id);
4ed46869 1919
b121a744
KH
1920 dimension = CHARSET_DIMENSION (charsetp);
1921 if (NILP (code1))
d47073ca
KH
1922 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1923 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1924 else
859f2b3c 1925 {
b121a744
KH
1926 CHECK_NATNUM (code1);
1927 if (XFASTINT (code1) >= 0x100)
1928 args_out_of_range (make_number (0xFF), code1);
1929 code = XFASTINT (code1);
859f2b3c 1930
b0a1e45e 1931 if (dimension > 1)
beeedaad 1932 {
b121a744 1933 code <<= 8;
b0a1e45e
KH
1934 if (NILP (code2))
1935 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1936 else
b121a744 1937 {
b0a1e45e
KH
1938 CHECK_NATNUM (code2);
1939 if (XFASTINT (code2) >= 0x100)
1940 args_out_of_range (make_number (0xFF), code2);
1941 code |= XFASTINT (code2);
b121a744 1942 }
99529c2c 1943
b0a1e45e 1944 if (dimension > 2)
b121a744
KH
1945 {
1946 code <<= 8;
b0a1e45e
KH
1947 if (NILP (code3))
1948 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1949 else
1950 {
b0a1e45e
KH
1951 CHECK_NATNUM (code3);
1952 if (XFASTINT (code3) >= 0x100)
1953 args_out_of_range (make_number (0xFF), code3);
1954 code |= XFASTINT (code3);
1955 }
1956
1957 if (dimension > 3)
1958 {
1959 code <<= 8;
1960 if (NILP (code4))
1961 code |= charsetp->code_space[0];
1962 else
1963 {
1964 CHECK_NATNUM (code4);
1965 if (XFASTINT (code4) >= 0x100)
1966 args_out_of_range (make_number (0xFF), code4);
1967 code |= XFASTINT (code4);
1968 }
b121a744
KH
1969 }
1970 }
beeedaad 1971 }
859f2b3c 1972 }
beeedaad 1973
b121a744
KH
1974 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1975 code &= 0x7F7F7F7F;
1976 c = DECODE_CHAR (charsetp, code);
1977 if (c < 0)
1978 error ("Invalid code(s)");
3263d5a2 1979 return make_number (c);
4ed46869
KH
1980}
1981
beeedaad 1982
3263d5a2
KH
1983/* Return the first charset in CHARSET_LIST that contains C.
1984 CHARSET_LIST is a list of charset IDs. If it is nil, use
1985 Vcharset_ordered_list. */
beeedaad 1986
3263d5a2 1987struct charset *
971de7fb 1988char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2e344af3 1989{
d5172d4f 1990 bool maybe_null = 0;
8a1816bb 1991
3263d5a2
KH
1992 if (NILP (charset_list))
1993 charset_list = Vcharset_ordered_list;
8a1816bb
KH
1994 else
1995 maybe_null = 1;
beeedaad 1996
6c652beb 1997 while (CONSP (charset_list))
2e344af3 1998 {
3263d5a2
KH
1999 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2000 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2001
3263d5a2 2002 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2003 {
3263d5a2
KH
2004 if (code_return)
2005 *code_return = code;
2006 return charset;
3f62427c 2007 }
3263d5a2 2008 charset_list = XCDR (charset_list);
c0be27fd
KH
2009 if (! maybe_null
2010 && c <= MAX_UNICODE_CHAR
2011 && EQ (charset_list, Vcharset_non_preferred_head))
6c652beb 2012 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2013 }
8a1816bb
KH
2014 return (maybe_null ? NULL
2015 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2016 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2017}
2018
2e344af3 2019
3263d5a2 2020DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2021 doc:
d0cf2d48 2022 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2023The charset is decided by the current priority order of charsets.
2024A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2025CH in the charset. */)
5842a27b 2026 (Lisp_Object ch)
4ed46869 2027{
3263d5a2
KH
2028 struct charset *charset;
2029 int c, dimension;
2030 unsigned code;
4ed46869
KH
2031 Lisp_Object val;
2032
3263d5a2
KH
2033 CHECK_CHARACTER (ch);
2034 c = XFASTINT (ch);
2035 charset = CHAR_CHARSET (c);
2036 if (! charset)
1088b922 2037 emacs_abort ();
3263d5a2
KH
2038 code = ENCODE_CHAR (charset, c);
2039 if (code == CHARSET_INVALID_CODE (charset))
1088b922 2040 emacs_abort ();
3263d5a2 2041 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2042 for (val = Qnil; dimension > 0; dimension--)
2043 {
2044 val = Fcons (make_number (code & 0xFF), val);
2045 code >>= 8;
2046 }
3263d5a2 2047 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2048}
2049
740f080d 2050
4cb75c4b
KH
2051DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2052 doc: /* Return the charset of highest priority that contains CH.
15c6d837
XF
2053ASCII characters are an exception: for them, this function always
2054returns `ascii'.
4cb75c4b
KH
2055If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2056from which to find the charset. It may also be a coding system. In
2057that case, find the charset from what supported by that coding system. */)
5842a27b 2058 (Lisp_Object ch, Lisp_Object restriction)
4ed46869 2059{
3263d5a2 2060 struct charset *charset;
4ed46869 2061
3263d5a2 2062 CHECK_CHARACTER (ch);
4cb75c4b
KH
2063 if (NILP (restriction))
2064 charset = CHAR_CHARSET (XINT (ch));
2065 else
2066 {
4cb75c4b
KH
2067 if (CONSP (restriction))
2068 {
7a84eee5
KH
2069 int c = XFASTINT (ch);
2070
2071 for (; CONSP (restriction); restriction = XCDR (restriction))
4cb75c4b 2072 {
f6095868 2073 struct charset *rcharset;
4cb75c4b 2074
f6095868
PE
2075 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2076 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
7a84eee5 2077 return XCAR (restriction);
4cb75c4b 2078 }
7a84eee5 2079 return Qnil;
4cb75c4b 2080 }
7a84eee5
KH
2081 restriction = coding_system_charset_list (restriction);
2082 charset = char_charset (XINT (ch), restriction, NULL);
4cb75c4b
KH
2083 if (! charset)
2084 return Qnil;
2085 }
3263d5a2 2086 return (CHARSET_NAME (charset));
4ed46869
KH
2087}
2088
17e7ef1b 2089
3263d5a2
KH
2090DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2091 doc: /*
2092Return charset of a character in the current buffer at position POS.
cd1181db 2093If POS is nil, it defaults to the current point.
3263d5a2 2094If POS is out of range, the value is nil. */)
5842a27b 2095 (Lisp_Object pos)
2e344af3 2096{
3263d5a2
KH
2097 Lisp_Object ch;
2098 struct charset *charset;
046b1f03 2099
3263d5a2
KH
2100 ch = Fchar_after (pos);
2101 if (! INTEGERP (ch))
2102 return ch;
2103 charset = CHAR_CHARSET (XINT (ch));
2104 return (CHARSET_NAME (charset));
6ae1f27e 2105}
9036eb45 2106
87b089ad 2107
3263d5a2
KH
2108DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2109 doc: /*
2110Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2111
2112ISO 2022's designation sequence (escape sequence) distinguishes charsets
2113by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2114whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2115See the documentation of the function `charset-info' for the meanings of
2116DIMENSION, CHARS, and FINAL-CHAR. */)
5842a27b 2117 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
6ae1f27e 2118{
3263d5a2 2119 int id;
d5172d4f 2120 bool chars_flag;
a8a35e61 2121
3263d5a2 2122 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2123 chars_flag = XFASTINT (chars) == 96;
2124 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2125 XFASTINT (final_char));
2126 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2127}
2128
87b089ad 2129
3263d5a2
KH
2130DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2131 0, 0, 0,
2132 doc: /*
ecca2aad
KH
2133Internal use only.
2134Clear temporary charset mapping tables.
2135It should be called only from temacs invoked for dumping. */)
5842a27b 2136 (void)
87b089ad 2137{
ecca2aad 2138 if (temp_charset_work)
87b089ad 2139 {
baad03f0 2140 xfree (temp_charset_work);
ecca2aad 2141 temp_charset_work = NULL;
2e344af3 2142 }
2e344af3 2143
ecca2aad
KH
2144 if (CHAR_TABLE_P (Vchar_unify_table))
2145 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2146
3263d5a2 2147 return Qnil;
740f080d
KH
2148}
2149
8ddf5e57
DL
2150DEFUN ("charset-priority-list", Fcharset_priority_list,
2151 Scharset_priority_list, 0, 1, 0,
2152 doc: /* Return the list of charsets ordered by priority.
2153HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 2154 (Lisp_Object highestp)
2e344af3 2155{
8ddf5e57 2156 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2157
8ddf5e57 2158 if (!NILP (highestp))
16fed1fc 2159 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2160
8ddf5e57 2161 while (!NILP (list))
2e344af3 2162 {
16fed1fc 2163 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2164 list = XCDR (list);
2e344af3 2165 }
8ddf5e57 2166 return Fnreverse (val);
2e344af3
KH
2167}
2168
8ddf5e57
DL
2169DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2170 1, MANY, 0,
2171 doc: /* Assign higher priority to the charsets given as arguments.
2172usage: (set-charset-priority &rest charsets) */)
f66c7cf8 2173 (ptrdiff_t nargs, Lisp_Object *args)
4ed46869 2174{
af7c60ca 2175 Lisp_Object new_head, old_list, arglist[2];
321c819c 2176 Lisp_Object list_2022, list_emacs_mule;
f66c7cf8 2177 ptrdiff_t i;
c5101a77 2178 int id;
4ed46869 2179
8ddf5e57 2180 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2181 new_head = Qnil;
8ddf5e57 2182 for (i = 0; i < nargs; i++)
4ed46869 2183 {
8ddf5e57 2184 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2185 if (! NILP (Fmemq (make_number (id), old_list)))
2186 {
2187 old_list = Fdelq (make_number (id), old_list);
2188 new_head = Fcons (make_number (id), new_head);
2189 }
5729c92f 2190 }
8ddf5e57 2191 arglist[0] = Fnreverse (new_head);
6a9c90ec 2192 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2193 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2194 charset_ordered_list_tick++;
5729c92f 2195
6809ca75 2196 charset_unibyte = -1;
321c819c 2197 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2198 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2199 {
e77415b0 2200 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2201 list_2022 = Fcons (XCAR (old_list), list_2022);
2202 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2203 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2204 if (charset_unibyte < 0)
2205 {
2206 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2207
2208 if (CHARSET_DIMENSION (charset) == 1
2209 && CHARSET_ASCII_COMPATIBLE_P (charset)
2210 && CHARSET_MAX_CHAR (charset) >= 0x80)
2211 charset_unibyte = CHARSET_ID (charset);
2212 }
4ed46869 2213 }
321c819c
KH
2214 Viso_2022_charset_list = Fnreverse (list_2022);
2215 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2216 if (charset_unibyte < 0)
2217 charset_unibyte = charset_iso_8859_1;
4ed46869 2218
8ddf5e57 2219 return Qnil;
4ed46869
KH
2220}
2221
d5b33309
KH
2222DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2223 0, 1, 0,
2224 doc: /* Internal use only.
2225Return charset identification number of CHARSET. */)
5842a27b 2226 (Lisp_Object charset)
4ed46869 2227{
d5b33309 2228 int id;
4ed46869 2229
d5b33309
KH
2230 CHECK_CHARSET_GET_ID (charset, id);
2231 return make_number (id);
4ed46869
KH
2232}
2233
7a84eee5
KH
2234struct charset_sort_data
2235{
2236 Lisp_Object charset;
2237 int id;
0065d054 2238 ptrdiff_t priority;
7a84eee5
KH
2239};
2240
2241static int
2242charset_compare (const void *d1, const void *d2)
2243{
2244 const struct charset_sort_data *data1 = d1, *data2 = d2;
0065d054
PE
2245 if (data1->priority != data2->priority)
2246 return data1->priority < data2->priority ? -1 : 1;
2247 return 0;
7a84eee5
KH
2248}
2249
2250DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2251 doc: /* Sort charset list CHARSETS by a priority of each charset.
2252Return the sorted list. CHARSETS is modified by side effects.
2253See also `charset-priority-list' and `set-charset-priority'. */)
2254 (Lisp_Object charsets)
2255{
2256 Lisp_Object len = Flength (charsets);
0065d054
PE
2257 ptrdiff_t n = XFASTINT (len), i, j;
2258 int done;
7a84eee5
KH
2259 Lisp_Object tail, elt, attrs;
2260 struct charset_sort_data *sort_data;
726929c4 2261 int id, min_id = INT_MAX, max_id = INT_MIN;
7a84eee5
KH
2262 USE_SAFE_ALLOCA;
2263
2264 if (n == 0)
2265 return Qnil;
0065d054 2266 SAFE_NALLOCA (sort_data, 1, n);
7a84eee5
KH
2267 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2268 {
2269 elt = XCAR (tail);
2270 CHECK_CHARSET_GET_ATTR (elt, attrs);
2271 sort_data[i].charset = elt;
2272 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
726929c4 2273 if (id < min_id)
7a84eee5 2274 min_id = id;
726929c4 2275 if (id > max_id)
7a84eee5
KH
2276 max_id = id;
2277 }
2278 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2279 done < n && CONSP (tail); tail = XCDR (tail), i++)
2280 {
2281 elt = XCAR (tail);
2282 id = XFASTINT (elt);
2283 if (id >= min_id && id <= max_id)
2284 for (j = 0; j < n; j++)
2285 if (sort_data[j].id == id)
2286 {
2287 sort_data[j].priority = i;
2288 done++;
2289 }
2290 }
2291 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2292 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2293 XSETCAR (tail, sort_data[i].charset);
2294 SAFE_FREE ();
2295 return charsets;
2296}
2297
4ed46869 2298\f
3263d5a2 2299void
971de7fb 2300init_charset (void)
4ed46869 2301{
c8f94403
GM
2302 Lisp_Object tempdir;
2303 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
73dcdb9f 2304 if (! file_accessible_directory_p (SSDATA (tempdir)))
c8f94403 2305 {
63541ed8
GM
2306 /* This used to be non-fatal (dir_warning), but it should not
2307 happen, and if it does sooner or later it will cause some
16905bec 2308 obscure problem (eg bug#6401), so better abort. */
63541ed8
GM
2309 fprintf (stderr, "Error: charsets directory not found:\n\
2310%s\n\
7b9cb544 2311Emacs will not function correctly without the character map files.\n%s\
c8f94403 2312Please check your installation!\n",
7b9cb544
GM
2313 SDATA (tempdir),
2314 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2315variable is set, maybe it has the wrong value?\n" : "");
63541ed8 2316 exit (1);
c8f94403
GM
2317 }
2318
6c6f1994 2319 Vcharset_map_path = list1 (tempdir);
4ed46869
KH
2320}
2321
4ed46869 2322
dfcf069d 2323void
971de7fb 2324init_charset_once (void)
4ed46869
KH
2325{
2326 int i, j, k;
2327
3263d5a2
KH
2328 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2329 for (j = 0; j < ISO_MAX_CHARS; j++)
2330 for (k = 0; k < ISO_MAX_FINAL; k++)
2331 iso_charset_table[i][j][k] = -1;
4ed46869 2332
60383934 2333 for (i = 0; i < 256; i++)
b84ae584 2334 emacs_mule_charset[i] = -1;
4ed46869 2335
7c7dceee
KH
2336 charset_jisx0201_roman = -1;
2337 charset_jisx0208_1978 = -1;
2338 charset_jisx0208 = -1;
d32320c4 2339 charset_ksc5601 = -1;
4ed46869
KH
2340}
2341
2342#ifdef emacs
2343
f701dc2a
PE
2344/* Allocate an initial charset table that is large enough to handle
2345 Emacs while it is bootstrapping. As of September 2011, the size
2346 needs to be at least 166; make it a bit bigger to allow for future
2347 expansion.
2348
2349 Don't make the value so small that the table is reallocated during
2350 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2351 during an initial bootstrap wreak havoc after dumping; see the
2352 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2353 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2354static struct charset charset_table_init[180];
2355
dfcf069d 2356void
971de7fb 2357syms_of_charset (void)
4ed46869 2358{
fe6aa7a1
BT
2359#include "charset.x"
2360
3263d5a2
KH
2361 DEFSYM (Qcharsetp, "charsetp");
2362
2363 DEFSYM (Qascii, "ascii");
2364 DEFSYM (Qunicode, "unicode");
6c652beb 2365 DEFSYM (Qemacs, "emacs");
2fe1edd1 2366 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2367 DEFSYM (Qiso_8859_1, "iso-8859-1");
2368
2369 DEFSYM (Qgl, "gl");
2370 DEFSYM (Qgr, "gr");
2371
3263d5a2
KH
2372 staticpro (&Vcharset_ordered_list);
2373 Vcharset_ordered_list = Qnil;
2374
2375 staticpro (&Viso_2022_charset_list);
2376 Viso_2022_charset_list = Qnil;
2377
2378 staticpro (&Vemacs_mule_charset_list);
2379 Vemacs_mule_charset_list = Qnil;
2380
3943ed76 2381 /* Don't staticpro them here. It's done in syms_of_fns. */
088dcc3e
DN
2382 QCtest = intern_c_string (":test");
2383 Qeq = intern_c_string ("eq");
3943ed76 2384
3263d5a2 2385 staticpro (&Vcharset_hash_table);
8f924df7
KH
2386 {
2387 Lisp_Object args[2];
2388 args[0] = QCtest;
2389 args[1] = Qeq;
2390 Vcharset_hash_table = Fmake_hash_table (2, args);
2391 }
3263d5a2 2392
f701dc2a 2393 charset_table = charset_table_init;
faa52174 2394 charset_table_size = ARRAYELTS (charset_table_init);
3263d5a2
KH
2395 charset_table_used = 0;
2396
29208e82 2397 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
fb7ada5f 2398 doc: /* List of directories to search for charset map files. */);
4beef065 2399 Vcharset_map_path = Qnil;
4ed46869 2400
29208e82 2401 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
ecca2aad
KH
2402 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2403 inhibit_load_charset_map = 0;
2404
29208e82 2405 DEFVAR_LISP ("charset-list", Vcharset_list,
528623a0 2406 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2407 Vcharset_list = Qnil;
2408
29208e82 2409 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
6a9c90ec
KH
2410 doc: /* ISO639 language mnemonic symbol for the current language environment.
2411If the current language environment is for multiple languages (e.g. "Latin-1"),
2412the value may be a list of mnemonics. */);
2413 Vcurrent_iso639_language = Qnil;
2414
2fe1edd1 2415 charset_ascii
12455b2f 2416 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2fe1edd1 2417 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523 2418 charset_iso_8859_1
12455b2f 2419 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
14e3d523 2420 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2421 charset_unicode
12455b2f 2422 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2fe1edd1 2423 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb 2424 charset_emacs
12455b2f 2425 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
6c652beb 2426 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1 2427 charset_eight_bit
12455b2f 2428 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
880820fe 2429 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2430 MAX_5_BYTE_CHAR + 1);
6809ca75 2431 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2432}
2433
2434#endif /* emacs */