Merge changes made in Gnus trunk.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
114f9c96 3 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 5 2005, 2006, 2007, 2008, 2009, 2010
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
7a84eee5 31#include <stdlib.h>
3263d5a2
KH
32#include <unistd.h>
33#include <ctype.h>
4ed46869 34#include <sys/types.h>
d7306fe6 35#include <setjmp.h>
4ed46869 36#include "lisp.h"
3263d5a2 37#include "character.h"
4ed46869
KH
38#include "charset.h"
39#include "coding.h"
fc6b09bf 40#include "disptab.h"
3263d5a2 41#include "buffer.h"
4ed46869 42
04c2f2c5 43/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 44
3263d5a2 45 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 46 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
4ed46869 50
3263d5a2
KH
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
04c2f2c5 54 charset_table as a struct charset.
4ed46869 55
3263d5a2 56*/
4ed46869 57
3263d5a2
KH
58/* List of all charsets. This variable is used only from Emacs
59 Lisp. */
4ed46869 60Lisp_Object Vcharset_list;
4ed46869 61
3263d5a2
KH
62/* Hash table that contains attributes of each charset. Keys are
63 charset symbols, and values are vectors of charset attributes. */
64Lisp_Object Vcharset_hash_table;
4ed46869 65
3263d5a2
KH
66/* Table of struct charset. */
67struct charset *charset_table;
4ed46869 68
3263d5a2 69static int charset_table_size;
5af5dd92 70static int charset_table_used;
4ed46869 71
3263d5a2 72Lisp_Object Qcharsetp;
4ed46869 73
3263d5a2
KH
74/* Special charset symbols. */
75Lisp_Object Qascii;
2fe1edd1 76Lisp_Object Qeight_bit;
3263d5a2
KH
77Lisp_Object Qiso_8859_1;
78Lisp_Object Qunicode;
6c652beb 79Lisp_Object Qemacs;
4ed46869 80
3263d5a2
KH
81/* The corresponding charsets. */
82int charset_ascii;
2fe1edd1 83int charset_eight_bit;
3263d5a2
KH
84int charset_iso_8859_1;
85int charset_unicode;
6c652beb 86int charset_emacs;
b0e3cf2b 87
7c7dceee
KH
88/* The other special charsets. */
89int charset_jisx0201_roman;
90int charset_jisx0208_1978;
91int charset_jisx0208;
d32320c4 92int charset_ksc5601;
c1a08b4c 93
3263d5a2
KH
94/* Value of charset attribute `charset-iso-plane'. */
95Lisp_Object Qgl, Qgr;
c1a08b4c 96
d1a04588
KH
97/* Charset of unibyte characters. */
98int charset_unibyte;
4ed46869 99
3263d5a2
KH
100/* List of charsets ordered by the priority. */
101Lisp_Object Vcharset_ordered_list;
4ed46869 102
6a9c90ec
KH
103/* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 charsets. */
105Lisp_Object Vcharset_non_preferred_head;
106
dbbb237d 107/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 108 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
109 -1. */
110unsigned short charset_ordered_list_tick;
4ed46869 111
3263d5a2
KH
112/* List of iso-2022 charsets. */
113Lisp_Object Viso_2022_charset_list;
35e623fb 114
3263d5a2
KH
115/* List of emacs-mule charsets. */
116Lisp_Object Vemacs_mule_charset_list;
117
118struct charset *emacs_mule_charset[256];
4ed46869
KH
119
120/* Mapping table from ISO2022's charset (specified by DIMENSION,
121 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
122int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
123
4beef065 124Lisp_Object Vcharset_map_path;
3263d5a2 125
ecca2aad
KH
126/* If nonzero, don't load charset maps. */
127int inhibit_load_charset_map;
3263d5a2 128
6a9c90ec
KH
129Lisp_Object Vcurrent_iso639_language;
130
69f8de5b
KH
131#define CODE_POINT_TO_INDEX(charset, code) \
132 ((charset)->code_linear_p \
133 ? (code) - (charset)->min_code \
134 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
135 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
136 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
137 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
138 ? (((((code) >> 24) - (charset)->code_space[12]) \
139 * (charset)->code_space[11]) \
140 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
141 * (charset)->code_space[7]) \
142 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
143 * (charset)->code_space[3]) \
820ee249
KH
144 + (((code) & 0xFF) - (charset)->code_space[0]) \
145 - ((charset)->char_index_offset)) \
3263d5a2
KH
146 : -1)
147
148
149/* Convert the character index IDX to code-point CODE for CHARSET.
150 It is assumed that IDX is in a valid range. */
151
820ee249
KH
152#define INDEX_TO_CODE_POINT(charset, idx) \
153 ((charset)->code_linear_p \
154 ? (idx) + (charset)->min_code \
155 : (idx += (charset)->char_index_offset, \
156 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
157 | (((charset)->code_space[4] \
158 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
159 << 8) \
160 | (((charset)->code_space[8] \
161 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
162 << 16) \
163 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
164 << 24))))
4cf9710d 165
ecca2aad
KH
166/* Structure to hold mapping tables for a charset. Used by temacs
167 invoked for dumping. */
8a73a704 168
ecca2aad
KH
169static struct
170{
171 /* The current charset for which the following tables are setup. */
172 struct charset *current;
173
174 /* 1 iff the following table is used for encoder. */
175 short for_encoder;
176
177 /* When the following table is used for encoding, mininum and
178 maxinum character of the current charset. */
179 int min_char, max_char;
180
181 /* A Unicode character correspoinding to the code indice 0 (i.e. the
182 minimum code-point) of the current charset, or -1 if the code
183 indice 0 is not a Unicode character. This is checked when
184 table.encoder[CHAR] is zero. */
185 int zero_index_char;
186
187 union {
188 /* Table mapping code-indices (not code-points) of the current
189 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
190 doesn't belong to the current charset. */
191 int decoder[0x10000];
192 /* Table mapping Unicode characters to code-indices of the current
193 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
194 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
195 (0x20000..0x2FFFF). Note that there is no charset map that
196 uses both SMP and SIP. */
197 unsigned short encoder[0x20000];
198 } table;
199} *temp_charset_work;
200
201#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
202 do { \
203 if ((CODE) == 0) \
204 temp_charset_work->zero_index_char = (C); \
205 else if ((C) < 0x20000) \
206 temp_charset_work->table.encoder[(C)] = (CODE); \
207 else \
208 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
209 } while (0)
210
211#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
212 ((C) == temp_charset_work->zero_index_char ? 0 \
213 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
214 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
215 : temp_charset_work->table.encoder[(C) - 0x10000] \
216 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
217
218#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
219 (temp_charset_work->table.decoder[(CODE)] = (C))
220
221#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
222 (temp_charset_work->table.decoder[(CODE)])
046b1f03 223\f
93bcb785 224
e9ce014c
KH
225/* Set to 1 to warn that a charset map is loaded and thus a buffer
226 text and a string data may be relocated. */
3263d5a2 227int charset_map_loaded;
4ed46869 228
e9ce014c 229struct charset_map_entries
4ed46869 230{
e9ce014c
KH
231 struct {
232 unsigned from, to;
233 int c;
234 } entry[0x10000];
235 struct charset_map_entries *next;
236};
237
ecca2aad
KH
238/* Load the mapping information of CHARSET from ENTRIES for
239 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
240 encoding (CONTROL_FLAG == 2).
241
242 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
243 and CHARSET->fast_map.
244
245 If CONTROL_FLAG is 1, setup the following tables according to
246 CHARSET->method and inhibit_load_charset_map.
4cf9710d 247
ecca2aad
KH
248 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
249 ----------------------+--------------------+---------------------------
250 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
251 ----------------------+--------------------+---------------------------
252 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 253
ecca2aad 254 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 255
ecca2aad
KH
256 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
259 ----------------------+--------------------+--------------------------
260 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
261*/
4ed46869 262
3263d5a2 263static void
971de7fb 264load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
4ed46869 265{
3263d5a2 266 Lisp_Object vec, table;
3263d5a2
KH
267 unsigned max_code = CHARSET_MAX_CODE (charset);
268 int ascii_compatible_p = charset->ascii_compatible_p;
269 int min_char, max_char, nonascii_min_char;
3263d5a2 270 int i;
3263d5a2 271 unsigned char *fast_map = charset->fast_map;
99529c2c 272
e9ce014c
KH
273 if (n_entries <= 0)
274 return;
275
ecca2aad 276 if (control_flag)
8ac5a9cc 277 {
ecca2aad
KH
278 if (! inhibit_load_charset_map)
279 {
280 if (control_flag == 1)
281 {
282 if (charset->method == CHARSET_METHOD_MAP)
283 {
284 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 285
ecca2aad
KH
286 vec = CHARSET_DECODER (charset)
287 = Fmake_vector (make_number (n), make_number (-1));
288 }
289 else
290 {
291 char_table_set_range (Vchar_unify_table,
292 charset->min_char, charset->max_char,
293 Qnil);
294 }
295 }
296 else
297 {
298 table = Fmake_char_table (Qnil, Qnil);
299 if (charset->method == CHARSET_METHOD_MAP)
300 CHARSET_ENCODER (charset) = table;
301 else
302 CHARSET_DEUNIFIER (charset) = table;
303 }
304 }
305 else
306 {
307 if (! temp_charset_work)
308 temp_charset_work = malloc (sizeof (*temp_charset_work));
309 if (control_flag == 1)
310 {
311 memset (temp_charset_work->table.decoder, -1,
312 sizeof (int) * 0x10000);
ecca2aad
KH
313 }
314 else
315 {
316 memset (temp_charset_work->table.encoder, 0,
317 sizeof (unsigned short) * 0x20000);
318 temp_charset_work->zero_index_char = -1;
319 }
320 temp_charset_work->current = charset;
321 temp_charset_work->for_encoder = (control_flag == 2);
322 control_flag += 2;
323 }
3263d5a2 324 charset_map_loaded = 1;
2e344af3 325 }
6662e69b 326
e9ce014c 327 min_char = max_char = entries->entry[0].c;
3263d5a2 328 nonascii_min_char = MAX_CHAR;
e9ce014c 329 for (i = 0; i < n_entries; i++)
2e344af3 330 {
e9ce014c 331 unsigned from, to;
3b4f4446
KH
332 int from_index, to_index;
333 int from_c, to_c;
e9ce014c 334 int idx = i % 0x10000;
3263d5a2 335
e9ce014c
KH
336 if (i > 0 && idx == 0)
337 entries = entries->next;
338 from = entries->entry[idx].from;
339 to = entries->entry[idx].to;
3b4f4446
KH
340 from_c = entries->entry[idx].c;
341 from_index = CODE_POINT_TO_INDEX (charset, from);
342 if (from == to)
6662e69b 343 {
3b4f4446
KH
344 to_index = from_index;
345 to_c = from_c;
6662e69b 346 }
3b4f4446 347 else
6662e69b 348 {
3b4f4446
KH
349 to_index = CODE_POINT_TO_INDEX (charset, to);
350 to_c = from_c + (to_index - from_index);
6662e69b 351 }
3b4f4446
KH
352 if (from_index < 0 || to_index < 0)
353 continue;
3263d5a2 354
ecca2aad
KH
355 if (to_c > max_char)
356 max_char = to_c;
357 else if (from_c < min_char)
358 min_char = from_c;
6662e69b 359
ecca2aad
KH
360 if (control_flag == 1)
361 {
362 if (charset->method == CHARSET_METHOD_MAP)
363 for (; from_index <= to_index; from_index++, from_c++)
364 ASET (vec, from_index, make_number (from_c));
365 else
366 for (; from_index <= to_index; from_index++, from_c++)
367 CHAR_TABLE_SET (Vchar_unify_table,
368 CHARSET_CODE_OFFSET (charset) + from_index,
369 make_number (from_c));
370 }
371 else if (control_flag == 2)
372 {
373 if (charset->method == CHARSET_METHOD_MAP
374 && CHARSET_COMPACT_CODES_P (charset))
375 for (; from_index <= to_index; from_index++, from_c++)
376 {
377 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
378
379 if (NILP (CHAR_TABLE_REF (table, from_c)))
380 CHAR_TABLE_SET (table, from_c, make_number (code));
381 }
382 else
383 for (; from_index <= to_index; from_index++, from_c++)
384 {
385 if (NILP (CHAR_TABLE_REF (table, from_c)))
386 CHAR_TABLE_SET (table, from_c, make_number (from_index));
387 }
388 }
389 else if (control_flag == 3)
390 for (; from_index <= to_index; from_index++, from_c++)
391 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
392 else if (control_flag == 4)
393 for (; from_index <= to_index; from_index++, from_c++)
394 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
395 else /* control_flag == 0 */
396 {
3b4f4446
KH
397 if (ascii_compatible_p)
398 {
399 if (! ASCII_BYTE_P (from_c))
400 {
401 if (from_c < nonascii_min_char)
402 nonascii_min_char = from_c;
403 }
404 else if (! ASCII_BYTE_P (to_c))
405 {
406 nonascii_min_char = 0x80;
407 }
408 }
177c0ea7 409
ecca2aad
KH
410 for (; from_c <= to_c; from_c++)
411 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 412 }
8ac5a9cc 413 }
3263d5a2 414
ecca2aad 415 if (control_flag == 0)
4ed46869 416 {
3263d5a2
KH
417 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
418 ? nonascii_min_char : min_char);
419 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 420 }
ecca2aad
KH
421 else if (control_flag == 4)
422 {
423 temp_charset_work->min_char = min_char;
424 temp_charset_work->max_char = max_char;
425 }
4ed46869
KH
426}
427
12bcae05 428
3263d5a2 429/* Read a hexadecimal number (preceded by "0x") from the file FP while
3b59c351 430 paying attention to comment character '#'. */
12bcae05 431
3263d5a2 432static INLINE unsigned
971de7fb 433read_hex (FILE *fp, int *eof)
12bcae05 434{
3263d5a2
KH
435 int c;
436 unsigned n;
12bcae05 437
3263d5a2
KH
438 while ((c = getc (fp)) != EOF)
439 {
69f8de5b 440 if (c == '#')
3263d5a2
KH
441 {
442 while ((c = getc (fp)) != EOF && c != '\n');
443 }
444 else if (c == '0')
445 {
446 if ((c = getc (fp)) == EOF || c == 'x')
447 break;
448 }
8f924df7 449 }
3263d5a2
KH
450 if (c == EOF)
451 {
452 *eof = 1;
453 return 0;
454 }
455 *eof = 0;
456 n = 0;
457 if (c == 'x')
458 while ((c = getc (fp)) != EOF && isxdigit (c))
459 n = ((n << 4)
460 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
461 else
462 while ((c = getc (fp)) != EOF && isdigit (c))
463 n = (n * 10) + c - '0';
e9ce014c
KH
464 if (c != EOF)
465 ungetc (c, fp);
3263d5a2
KH
466 return n;
467}
12bcae05 468
3263d5a2 469/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
470 Each line of MAPFILE has this form
471 0xAAAA 0xCCCC
472 where 0xAAAA is a code-point and 0xCCCC is the corresponding
473 character code, or this form
474 0xAAAA-0xBBBB 0xCCCC
475 where 0xAAAA and 0xBBBB are code-points specifying a range, and
476 0xCCCC is the first character code of the range.
4ed46869 477
3263d5a2
KH
478 The returned vector has this form:
479 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 480 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
481 range.
482
18a10a21
JB
483 Note that this function uses `openp' to open MAPFILE but ignores
484 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 485
e9ce014c 486static void
971de7fb 487load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
4ed46869 488{
e9ce014c
KH
489 unsigned min_code = CHARSET_MIN_CODE (charset);
490 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
491 int fd;
492 FILE *fp;
3263d5a2
KH
493 int eof;
494 Lisp_Object suffixes;
e9ce014c 495 struct charset_map_entries *head, *entries;
a2f3eb19
CY
496 int n_entries, count;
497 USE_SAFE_ALLOCA;
4ed46869 498
3263d5a2
KH
499 suffixes = Fcons (build_string (".map"),
500 Fcons (build_string (".TXT"), Qnil));
4ed46869 501
a2f3eb19 502 count = SPECPDL_INDEX ();
dde2559c 503 specbind (Qfile_name_handler_alist, Qnil);
4beef065 504 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
dde2559c 505 unbind_to (count, Qnil);
3263d5a2
KH
506 if (fd < 0
507 || ! (fp = fdopen (fd, "r")))
8458d4c1 508 error ("Failure in loading charset map: %S", SDATA (mapfile));
4ed46869 509
a2f3eb19
CY
510 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
511 large (larger than MAX_ALLOCA). */
512 SAFE_ALLOCA (head, struct charset_map_entries *,
513 sizeof (struct charset_map_entries));
514 entries = head;
72af86bd 515 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 516
e9ce014c 517 n_entries = 0;
3263d5a2
KH
518 eof = 0;
519 while (1)
520 {
e9ce014c
KH
521 unsigned from, to;
522 int c;
523 int idx;
4ed46869 524
e9ce014c 525 from = read_hex (fp, &eof);
3263d5a2
KH
526 if (eof)
527 break;
e9ce014c
KH
528 if (getc (fp) == '-')
529 to = read_hex (fp, &eof);
530 else
531 to = from;
532 c = (int) read_hex (fp, &eof);
ac4137cc 533
e9ce014c
KH
534 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
535 continue;
ac4137cc 536
e9ce014c 537 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 538 {
a2f3eb19
CY
539 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
540 sizeof (struct charset_map_entries));
e9ce014c 541 entries = entries->next;
72af86bd 542 memset (entries, 0, sizeof (struct charset_map_entries));
3263d5a2 543 }
e9ce014c
KH
544 idx = n_entries % 0x10000;
545 entries->entry[idx].from = from;
546 entries->entry[idx].to = to;
547 entries->entry[idx].c = c;
548 n_entries++;
3263d5a2
KH
549 }
550 fclose (fp);
177c0ea7 551
e9ce014c 552 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 553 SAFE_FREE ();
4ed46869
KH
554}
555
e9ce014c 556static void
971de7fb 557load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
23d2a7f1 558{
e9ce014c
KH
559 unsigned min_code = CHARSET_MIN_CODE (charset);
560 unsigned max_code = CHARSET_MAX_CODE (charset);
561 struct charset_map_entries *head, *entries;
562 int n_entries;
563 int len = ASIZE (vec);
564 int i;
a2f3eb19 565 USE_SAFE_ALLOCA;
23d2a7f1 566
e9ce014c 567 if (len % 2 == 1)
3263d5a2 568 {
e9ce014c
KH
569 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
570 return;
3263d5a2 571 }
35e623fb 572
a2f3eb19
CY
573 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
574 large (larger than MAX_ALLOCA). */
575 SAFE_ALLOCA (head, struct charset_map_entries *,
576 sizeof (struct charset_map_entries));
577 entries = head;
72af86bd 578 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 579
e9ce014c
KH
580 n_entries = 0;
581 for (i = 0; i < len; i += 2)
35e623fb 582 {
e9ce014c
KH
583 Lisp_Object val, val2;
584 unsigned from, to;
585 int c;
586 int idx;
d2665018 587
e9ce014c
KH
588 val = AREF (vec, i);
589 if (CONSP (val))
bbf12bb3 590 {
e9ce014c
KH
591 val2 = XCDR (val);
592 val = XCAR (val);
593 CHECK_NATNUM (val);
594 CHECK_NATNUM (val2);
595 from = XFASTINT (val);
596 to = XFASTINT (val2);
bbf12bb3 597 }
e9ce014c 598 else
bbf12bb3 599 {
e9ce014c
KH
600 CHECK_NATNUM (val);
601 from = to = XFASTINT (val);
bbf12bb3 602 }
e9ce014c
KH
603 val = AREF (vec, i + 1);
604 CHECK_NATNUM (val);
605 c = XFASTINT (val);
76d7b829 606
e9ce014c
KH
607 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
608 continue;
76d7b829 609
dcc694d7 610 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 611 {
a2f3eb19
CY
612 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
613 sizeof (struct charset_map_entries));
e9ce014c 614 entries = entries->next;
72af86bd 615 memset (entries, 0, sizeof (struct charset_map_entries));
e9ce014c
KH
616 }
617 idx = n_entries % 0x10000;
618 entries->entry[idx].from = from;
619 entries->entry[idx].to = to;
620 entries->entry[idx].c = c;
621 n_entries++;
622 }
76d7b829 623
e9ce014c 624 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 625 SAFE_FREE ();
ac4137cc
KH
626}
627
ecca2aad
KH
628
629/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
630 map it is (see the comment of load_charset_map for the detail). */
631
3263d5a2 632static void
971de7fb 633load_charset (struct charset *charset, int control_flag)
76d7b829 634{
ecca2aad 635 Lisp_Object map;
76d7b829 636
ecca2aad
KH
637 if (inhibit_load_charset_map
638 && temp_charset_work
639 && charset == temp_charset_work->current
56f00ed2 640 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
641 return;
642
643 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
644 map = CHARSET_MAP (charset);
645 else if (CHARSET_UNIFIED_P (charset))
646 map = CHARSET_UNIFY_MAP (charset);
647 if (STRINGP (map))
648 load_charset_map_from_file (charset, map, control_flag);
649 else
650 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 651}
76d7b829 652
3263d5a2
KH
653
654DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
655 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
5842a27b 656 (Lisp_Object object)
23d2a7f1 657{
3263d5a2 658 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
659}
660
4ed46869 661
f57e2426
J
662void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
663 Lisp_Object function, Lisp_Object arg,
664 unsigned from, unsigned to);
ecca2aad
KH
665
666void
971de7fb 667map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
ecca2aad
KH
668{
669 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
670 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
671 Lisp_Object range;
672 int c, stop;
673 struct gcpro gcpro1;
674
675 range = Fcons (Qnil, Qnil);
676 GCPRO1 (range);
677
678 c = temp_charset_work->min_char;
679 stop = (temp_charset_work->max_char < 0x20000
680 ? temp_charset_work->max_char : 0xFFFF);
681
682 while (1)
683 {
684 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
685
686 if (index >= from_idx && index <= to_idx)
687 {
688 if (NILP (XCAR (range)))
689 XSETCAR (range, make_number (c));
690 }
691 else if (! NILP (XCAR (range)))
692 {
693 XSETCDR (range, make_number (c - 1));
694 if (c_function)
695 (*c_function) (arg, range);
696 else
697 call2 (function, range, arg);
698 XSETCAR (range, Qnil);
699 }
700 if (c == stop)
701 {
702 if (c == temp_charset_work->max_char)
703 {
704 if (! NILP (XCAR (range)))
705 {
706 XSETCDR (range, make_number (c));
707 if (c_function)
708 (*c_function) (arg, range);
709 else
710 call2 (function, range, arg);
711 }
712 break;
713 }
714 c = 0x1FFFF;
715 stop = temp_charset_work->max_char;
716 }
717 c++;
718 }
c542407d 719 UNGCPRO;
ecca2aad
KH
720}
721
4ed46869 722void
6f704c76
DN
723map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
724 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
4ed46869 725{
3263d5a2 726 Lisp_Object range;
374c5cfd 727 int partial;
3263d5a2 728
374c5cfd
KH
729 partial = (from > CHARSET_MIN_CODE (charset)
730 || to < CHARSET_MAX_CODE (charset));
731
3263d5a2 732 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 733 {
374c5cfd
KH
734 int from_idx = CODE_POINT_TO_INDEX (charset, from);
735 int to_idx = CODE_POINT_TO_INDEX (charset, to);
736 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
737 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
738
ecca2aad
KH
739 if (CHARSET_UNIFIED_P (charset))
740 {
741 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
742 load_charset (charset, 2);
743 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
744 map_char_table_for_charset (c_function, function,
745 CHARSET_DEUNIFIER (charset), arg,
746 partial ? charset : NULL, from, to);
747 else
748 map_charset_for_dump (c_function, function, arg, from, to);
749 }
750
374c5cfd 751 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 752 if (NILP (function))
5af5dd92 753 (*c_function) (arg, range);
3263d5a2
KH
754 else
755 call2 (function, range, arg);
c83ef371 756 }
3263d5a2
KH
757 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
758 {
759 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
760 load_charset (charset, 2);
761 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
762 map_char_table_for_charset (c_function, function,
763 CHARSET_ENCODER (charset), arg,
764 partial ? charset : NULL, from, to);
765 else
766 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 767 }
374c5cfd 768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 769 {
374c5cfd
KH
770 Lisp_Object subset_info;
771 int offset;
772
773 subset_info = CHARSET_SUBSET (charset);
774 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
775 offset = XINT (AREF (subset_info, 3));
776 from -= offset;
777 if (from < XFASTINT (AREF (subset_info, 1)))
778 from = XFASTINT (AREF (subset_info, 1));
779 to -= offset;
780 if (to > XFASTINT (AREF (subset_info, 2)))
781 to = XFASTINT (AREF (subset_info, 2));
782 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 783 }
374c5cfd
KH
784 else /* i.e. CHARSET_METHOD_SUPERSET */
785 {
786 Lisp_Object parents;
4ed46869 787
374c5cfd
KH
788 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
789 parents = XCDR (parents))
bbf12bb3 790 {
374c5cfd
KH
791 int offset;
792 unsigned this_from, this_to;
793
794 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
795 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
796 this_from = from > offset ? from - offset : 0;
797 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
798 if (this_from < CHARSET_MIN_CODE (charset))
799 this_from = CHARSET_MIN_CODE (charset);
800 if (this_to > CHARSET_MAX_CODE (charset))
801 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
802 map_charset_chars (c_function, function, arg, charset,
803 this_from, this_to);
bbf12bb3 804 }
35e623fb 805 }
4ed46869
KH
806}
807
374c5cfd 808DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 809 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 810FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 811argument ARG.
4ed46869 812
374c5cfd
KH
813RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
814characters contained in CHARSET.
4ed46869 815
374c5cfd 816The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 817range of code points (in CHARSET) of target characters. */)
5842a27b 818 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
4ed46869 819{
374c5cfd 820 struct charset *cs;
16fed1fc 821 unsigned from, to;
4ed46869 822
374c5cfd
KH
823 CHECK_CHARSET_GET_CHARSET (charset, cs);
824 if (NILP (from_code))
16fed1fc 825 from = CHARSET_MIN_CODE (cs);
970b7474 826 else
4ed46869 827 {
970b7474
KH
828 CHECK_NATNUM (from_code);
829 from = XINT (from_code);
830 if (from < CHARSET_MIN_CODE (cs))
831 from = CHARSET_MIN_CODE (cs);
4ed46869 832 }
374c5cfd 833 if (NILP (to_code))
970b7474 834 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
835 else
836 {
970b7474
KH
837 CHECK_NATNUM (to_code);
838 to = XINT (to_code);
839 if (to > CHARSET_MAX_CODE (cs))
840 to = CHARSET_MAX_CODE (cs);
4ed46869 841 }
16fed1fc 842 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 843 return Qnil;
35e623fb 844}
4ed46869 845
4ed46869 846
3263d5a2
KH
847/* Define a charset according to the arguments. The Nth argument is
848 the Nth attribute of the charset (the last attribute `charset-id'
849 is not included). See the docstring of `define-charset' for the
850 detail. */
4ed46869 851
3263d5a2
KH
852DEFUN ("define-charset-internal", Fdefine_charset_internal,
853 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
854 doc: /* For internal use only.
855usage: (define-charset-internal ...) */)
5842a27b 856 (int nargs, Lisp_Object *args)
4ed46869 857{
3263d5a2
KH
858 /* Charset attr vector. */
859 Lisp_Object attrs;
860 Lisp_Object val;
861 unsigned hash_code;
862 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 863 int i, j;
3263d5a2
KH
864 struct charset charset;
865 int id;
866 int dimension;
867 int new_definition_p;
868 int nchars;
869
870 if (nargs != charset_arg_max)
871 return Fsignal (Qwrong_number_of_arguments,
872 Fcons (intern ("define-charset-internal"),
873 make_number (nargs)));
874
875 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
876
877 CHECK_SYMBOL (args[charset_arg_name]);
878 ASET (attrs, charset_name, args[charset_arg_name]);
879
880 val = args[charset_arg_code_space];
881 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 882 {
3263d5a2
KH
883 int min_byte, max_byte;
884
885 min_byte = XINT (Faref (val, make_number (i * 2)));
886 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
887 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
888 error ("Invalid :code-space value");
889 charset.code_space[i * 4] = min_byte;
890 charset.code_space[i * 4 + 1] = max_byte;
891 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
892 nchars *= charset.code_space[i * 4 + 2];
893 charset.code_space[i * 4 + 3] = nchars;
894 if (max_byte > 0)
895 dimension = i + 1;
896 }
4ed46869 897
3263d5a2
KH
898 val = args[charset_arg_dimension];
899 if (NILP (val))
900 charset.dimension = dimension;
901 else
4ed46869 902 {
3263d5a2
KH
903 CHECK_NATNUM (val);
904 charset.dimension = XINT (val);
905 if (charset.dimension < 1 || charset.dimension > 4)
906 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
907 }
908
3263d5a2
KH
909 charset.code_linear_p
910 = (charset.dimension == 1
911 || (charset.code_space[2] == 256
912 && (charset.dimension == 2
913 || (charset.code_space[6] == 256
914 && (charset.dimension == 3
915 || charset.code_space[10] == 256)))));
916
69f8de5b 917 if (! charset.code_linear_p)
4ed46869 918 {
69f8de5b 919 charset.code_space_mask = (unsigned char *) xmalloc (256);
72af86bd 920 memset (charset.code_space_mask, 0, 256);
69f8de5b
KH
921 for (i = 0; i < 4; i++)
922 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
923 j++)
924 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
925 }
926
3263d5a2 927 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 928
3263d5a2
KH
929 charset.min_code = (charset.code_space[0]
930 | (charset.code_space[4] << 8)
931 | (charset.code_space[8] << 16)
932 | (charset.code_space[12] << 24));
933 charset.max_code = (charset.code_space[1]
934 | (charset.code_space[5] << 8)
935 | (charset.code_space[9] << 16)
936 | (charset.code_space[13] << 24));
820ee249 937 charset.char_index_offset = 0;
8a73a704 938
820ee249
KH
939 val = args[charset_arg_min_code];
940 if (! NILP (val))
941 {
942 unsigned code;
fdb82f93 943
820ee249
KH
944 if (INTEGERP (val))
945 code = XINT (val);
946 else
947 {
948 CHECK_CONS (val);
8f924df7
KH
949 CHECK_NUMBER_CAR (val);
950 CHECK_NUMBER_CDR (val);
820ee249
KH
951 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
952 }
953 if (code < charset.min_code
954 || code > charset.max_code)
955 args_out_of_range_3 (make_number (charset.min_code),
956 make_number (charset.max_code), val);
957 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
958 charset.min_code = code;
959 }
3fac5a51 960
820ee249
KH
961 val = args[charset_arg_max_code];
962 if (! NILP (val))
3fac5a51 963 {
820ee249
KH
964 unsigned code;
965
966 if (INTEGERP (val))
967 code = XINT (val);
968 else
969 {
970 CHECK_CONS (val);
8f924df7
KH
971 CHECK_NUMBER_CAR (val);
972 CHECK_NUMBER_CDR (val);
820ee249
KH
973 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
974 }
975 if (code < charset.min_code
976 || code > charset.max_code)
977 args_out_of_range_3 (make_number (charset.min_code),
978 make_number (charset.max_code), val);
979 charset.max_code = code;
3fac5a51 980 }
3fac5a51 981
ecca2aad 982 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 983
3263d5a2
KH
984 val = args[charset_arg_invalid_code];
985 if (NILP (val))
986 {
987 if (charset.min_code > 0)
988 charset.invalid_code = 0;
bbf12bb3
KH
989 else
990 {
3263d5a2
KH
991 XSETINT (val, charset.max_code + 1);
992 if (XINT (val) == charset.max_code + 1)
993 charset.invalid_code = charset.max_code + 1;
994 else
995 error ("Attribute :invalid-code must be specified");
76d7b829 996 }
76d7b829 997 }
3263d5a2
KH
998 else
999 {
1000 CHECK_NATNUM (val);
1001 charset.invalid_code = XFASTINT (val);
1002 }
4ed46869 1003
3263d5a2
KH
1004 val = args[charset_arg_iso_final];
1005 if (NILP (val))
1006 charset.iso_final = -1;
1007 else
1008 {
1009 CHECK_NUMBER (val);
1010 if (XINT (val) < '0' || XINT (val) > 127)
1011 error ("Invalid iso-final-char: %d", XINT (val));
1012 charset.iso_final = XINT (val);
1013 }
4ed46869 1014
3263d5a2
KH
1015 val = args[charset_arg_iso_revision];
1016 if (NILP (val))
1017 charset.iso_revision = -1;
1018 else
4ed46869 1019 {
3263d5a2
KH
1020 CHECK_NUMBER (val);
1021 if (XINT (val) > 63)
1022 args_out_of_range (make_number (63), val);
1023 charset.iso_revision = XINT (val);
4ed46869 1024 }
4ed46869 1025
3263d5a2
KH
1026 val = args[charset_arg_emacs_mule_id];
1027 if (NILP (val))
1028 charset.emacs_mule_id = -1;
4ed46869
KH
1029 else
1030 {
3263d5a2
KH
1031 CHECK_NATNUM (val);
1032 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1033 error ("Invalid emacs-mule-id: %d", XINT (val));
1034 charset.emacs_mule_id = XINT (val);
c83ef371 1035 }
f6302ac9 1036
3263d5a2 1037 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1038
3263d5a2 1039 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1040
3263d5a2
KH
1041 charset.unified_p = 0;
1042
72af86bd 1043 memset (charset.fast_map, 0, sizeof (charset.fast_map));
3263d5a2
KH
1044
1045 if (! NILP (args[charset_arg_code_offset]))
1046 {
1047 val = args[charset_arg_code_offset];
1048 CHECK_NUMBER (val);
1049
1050 charset.method = CHARSET_METHOD_OFFSET;
1051 charset.code_offset = XINT (val);
1052
1053 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1054 charset.min_char = i + charset.code_offset;
1055 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1056 charset.max_char = i + charset.code_offset;
1057 if (charset.max_char > MAX_CHAR)
1058 error ("Unsupported max char: %d", charset.max_char);
1059
f148205f
KH
1060 i = (charset.min_char >> 7) << 7;
1061 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1062 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1063 i = (i >> 12) << 12;
3263d5a2
KH
1064 for (; i <= charset.max_char; i += 0x1000)
1065 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1066 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1067 charset.ascii_compatible_p = 1;
3263d5a2
KH
1068 }
1069 else if (! NILP (args[charset_arg_map]))
1070 {
1071 val = args[charset_arg_map];
1072 ASET (attrs, charset_map, val);
ecca2aad 1073 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1074 }
374c5cfd 1075 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1076 {
374c5cfd
KH
1077 Lisp_Object parent;
1078 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1079 struct charset *parent_charset;
1080
1081 val = args[charset_arg_subset];
1082 parent = Fcar (val);
1083 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1084 parent_min_code = Fnth (make_number (1), val);
1085 CHECK_NATNUM (parent_min_code);
1086 parent_max_code = Fnth (make_number (2), val);
1087 CHECK_NATNUM (parent_max_code);
1088 parent_code_offset = Fnth (make_number (3), val);
1089 CHECK_NUMBER (parent_code_offset);
1090 val = Fmake_vector (make_number (4), Qnil);
1091 ASET (val, 0, make_number (parent_charset->id));
1092 ASET (val, 1, parent_min_code);
1093 ASET (val, 2, parent_max_code);
1094 ASET (val, 3, parent_code_offset);
1095 ASET (attrs, charset_subset, val);
1096
1097 charset.method = CHARSET_METHOD_SUBSET;
1098 /* Here, we just copy the parent's fast_map. It's not accurate,
1099 but at least it works for quickly detecting which character
1100 DOESN'T belong to this charset. */
1101 for (i = 0; i < 190; i++)
1102 charset.fast_map[i] = parent_charset->fast_map[i];
1103
1104 /* We also copy these for parents. */
1105 charset.min_char = parent_charset->min_char;
1106 charset.max_char = parent_charset->max_char;
1107 }
1108 else if (! NILP (args[charset_arg_superset]))
0282eb69 1109 {
374c5cfd
KH
1110 val = args[charset_arg_superset];
1111 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1112 val = Fcopy_sequence (val);
374c5cfd 1113 ASET (attrs, charset_superset, val);
3263d5a2
KH
1114
1115 charset.min_char = MAX_CHAR;
1116 charset.max_char = 0;
1117 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1118 {
3263d5a2
KH
1119 Lisp_Object elt, car_part, cdr_part;
1120 int this_id, offset;
1121 struct charset *this_charset;
2e344af3 1122
3263d5a2
KH
1123 elt = Fcar (val);
1124 if (CONSP (elt))
2e344af3 1125 {
3263d5a2
KH
1126 car_part = XCAR (elt);
1127 cdr_part = XCDR (elt);
1128 CHECK_CHARSET_GET_ID (car_part, this_id);
1129 CHECK_NUMBER (cdr_part);
1130 offset = XINT (cdr_part);
177c0ea7 1131 }
3263d5a2 1132 else
4ed46869 1133 {
3263d5a2
KH
1134 CHECK_CHARSET_GET_ID (elt, this_id);
1135 offset = 0;
4ed46869 1136 }
3263d5a2
KH
1137 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1138
1139 this_charset = CHARSET_FROM_ID (this_id);
1140 if (charset.min_char > this_charset->min_char)
1141 charset.min_char = this_charset->min_char;
1142 if (charset.max_char < this_charset->max_char)
1143 charset.max_char = this_charset->max_char;
1144 for (i = 0; i < 190; i++)
1145 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1146 }
0282eb69 1147 }
2e344af3 1148 else
3263d5a2 1149 error ("None of :code-offset, :map, :parents are specified");
05505664 1150
3263d5a2
KH
1151 val = args[charset_arg_unify_map];
1152 if (! NILP (val) && !STRINGP (val))
1153 CHECK_VECTOR (val);
1154 ASET (attrs, charset_unify_map, val);
05505664 1155
3263d5a2
KH
1156 CHECK_LIST (args[charset_arg_plist]);
1157 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1158
3263d5a2
KH
1159 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1160 &hash_code);
1161 if (charset.hash_index >= 0)
1162 {
1163 new_definition_p = 0;
4f65af01 1164 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1165 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1166 }
1a45ff10 1167 else
3263d5a2
KH
1168 {
1169 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1170 hash_code);
1171 if (charset_table_used == charset_table_size)
1172 {
2fe1edd1
KH
1173 struct charset *new_table
1174 = (struct charset *) xmalloc (sizeof (struct charset)
1175 * (charset_table_size + 16));
72af86bd
AS
1176 memcpy (new_table, charset_table,
1177 sizeof (struct charset) * charset_table_size);
2fe1edd1
KH
1178 charset_table_size += 16;
1179 charset_table = new_table;
3263d5a2
KH
1180 }
1181 id = charset_table_used++;
3263d5a2
KH
1182 new_definition_p = 1;
1183 }
2e344af3 1184
4f65af01 1185 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1186 charset.id = id;
1187 charset_table[id] = charset;
2e344af3 1188
ecca2aad 1189 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1190 {
1191 load_charset (&charset, 0);
1192 charset_table[id] = charset;
1193 }
ecca2aad 1194
3263d5a2 1195 if (charset.iso_final >= 0)
4ed46869 1196 {
3263d5a2
KH
1197 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1198 charset.iso_final) = id;
1199 if (new_definition_p)
1200 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1201 Fcons (make_number (id), Qnil));
7c7dceee
KH
1202 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1203 charset_jisx0201_roman = id;
1204 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1205 charset_jisx0208_1978 = id;
1206 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1207 charset_jisx0208 = id;
d32320c4
KH
1208 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1209 charset_ksc5601 = id;
4ed46869 1210 }
d0cf2d48 1211
3263d5a2 1212 if (charset.emacs_mule_id >= 0)
4ed46869 1213 {
3263d5a2 1214 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1215 if (charset.emacs_mule_id < 0xA0)
1216 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1217 else
1218 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1219 if (new_definition_p)
1220 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1221 Fcons (make_number (id), Qnil));
4ed46869
KH
1222 }
1223
3263d5a2
KH
1224 if (new_definition_p)
1225 {
1226 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1227 if (charset.supplementary_p)
1228 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1229 Fcons (make_number (id), Qnil));
1230 else
880820fe
KH
1231 {
1232 Lisp_Object tail;
1233
1234 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1235 {
1236 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1237
1238 if (cs->supplementary_p)
1239 break;
1240 }
1241 if (EQ (tail, Vcharset_ordered_list))
1242 Vcharset_ordered_list = Fcons (make_number (id),
1243 Vcharset_ordered_list);
1244 else if (NILP (tail))
1245 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1246 Fcons (make_number (id), Qnil));
1247 else
1248 {
1249 val = Fcons (XCAR (tail), XCDR (tail));
1250 XSETCDR (tail, val);
1251 XSETCAR (tail, make_number (id));
1252 }
1253 }
dbbb237d 1254 charset_ordered_list_tick++;
3263d5a2 1255 }
4ed46869 1256
3263d5a2 1257 return Qnil;
4ed46869
KH
1258}
1259
2fe1edd1
KH
1260
1261/* Same as Fdefine_charset_internal but arguments are more convenient
1262 to call from C (typically in syms_of_charset). This can define a
1263 charset of `offset' method only. Return the ID of the new
1264 charset. */
1265
1266static int
dd4c5104
DN
1267define_charset_internal (Lisp_Object name,
1268 int dimension,
675e2c69 1269 const unsigned char *code_space,
dd4c5104
DN
1270 unsigned min_code, unsigned max_code,
1271 int iso_final, int iso_revision, int emacs_mule_id,
1272 int ascii_compatible, int supplementary,
1273 int code_offset)
2fe1edd1
KH
1274{
1275 Lisp_Object args[charset_arg_max];
1276 Lisp_Object plist[14];
1277 Lisp_Object val;
1278 int i;
1279
1280 args[charset_arg_name] = name;
1281 args[charset_arg_dimension] = make_number (dimension);
1282 val = Fmake_vector (make_number (8), make_number (0));
1283 for (i = 0; i < 8; i++)
1284 ASET (val, i, make_number (code_space[i]));
1285 args[charset_arg_code_space] = val;
1286 args[charset_arg_min_code] = make_number (min_code);
1287 args[charset_arg_max_code] = make_number (max_code);
1288 args[charset_arg_iso_final]
1289 = (iso_final < 0 ? Qnil : make_number (iso_final));
1290 args[charset_arg_iso_revision] = make_number (iso_revision);
1291 args[charset_arg_emacs_mule_id]
1292 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1293 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1294 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1295 args[charset_arg_invalid_code] = Qnil;
1296 args[charset_arg_code_offset] = make_number (code_offset);
1297 args[charset_arg_map] = Qnil;
1298 args[charset_arg_subset] = Qnil;
1299 args[charset_arg_superset] = Qnil;
1300 args[charset_arg_unify_map] = Qnil;
1301
d67b4f80 1302 plist[0] = intern_c_string (":name");
2fe1edd1 1303 plist[1] = args[charset_arg_name];
d67b4f80 1304 plist[2] = intern_c_string (":dimension");
2fe1edd1 1305 plist[3] = args[charset_arg_dimension];
d67b4f80 1306 plist[4] = intern_c_string (":code-space");
2fe1edd1 1307 plist[5] = args[charset_arg_code_space];
d67b4f80 1308 plist[6] = intern_c_string (":iso-final-char");
2fe1edd1 1309 plist[7] = args[charset_arg_iso_final];
d67b4f80 1310 plist[8] = intern_c_string (":emacs-mule-id");
2fe1edd1 1311 plist[9] = args[charset_arg_emacs_mule_id];
d67b4f80 1312 plist[10] = intern_c_string (":ascii-compatible-p");
2fe1edd1 1313 plist[11] = args[charset_arg_ascii_compatible_p];
d67b4f80 1314 plist[12] = intern_c_string (":code-offset");
2fe1edd1
KH
1315 plist[13] = args[charset_arg_code_offset];
1316
1317 args[charset_arg_plist] = Flist (14, plist);
1318 Fdefine_charset_internal (charset_arg_max, args);
1319
1320 return XINT (CHARSET_SYMBOL_ID (name));
1321}
1322
1323
3263d5a2
KH
1324DEFUN ("define-charset-alias", Fdefine_charset_alias,
1325 Sdefine_charset_alias, 2, 2, 0,
1326 doc: /* Define ALIAS as an alias for charset CHARSET. */)
5842a27b 1327 (Lisp_Object alias, Lisp_Object charset)
4ed46869 1328{
3263d5a2
KH
1329 Lisp_Object attr;
1330
1331 CHECK_CHARSET_GET_ATTR (charset, attr);
1332 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1333 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1334 return Qnil;
1335}
4ed46869 1336
4ed46869 1337
3263d5a2 1338DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1339 doc: /* Return the property list of CHARSET. */)
5842a27b 1340 (Lisp_Object charset)
3263d5a2
KH
1341{
1342 Lisp_Object attrs;
1343
1344 CHECK_CHARSET_GET_ATTR (charset, attrs);
1345 return CHARSET_ATTR_PLIST (attrs);
1346}
1347
1348
1349DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1350 doc: /* Set CHARSET's property list to PLIST. */)
5842a27b 1351 (Lisp_Object charset, Lisp_Object plist)
3263d5a2
KH
1352{
1353 Lisp_Object attrs;
1354
1355 CHECK_CHARSET_GET_ATTR (charset, attrs);
1356 CHARSET_ATTR_PLIST (attrs) = plist;
1357 return plist;
1358}
1359
1360
dbbb237d 1361DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1362 doc: /* Unify characters of CHARSET with Unicode.
1363This means reading the relevant file and installing the table defined
dbbb237d
KH
1364by CHARSET's `:unify-map' property.
1365
64165ae2
DL
1366Optional second arg UNIFY-MAP is a file name string or a vector. It has
1367the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1368`define-charset' (which see).
1369
1370Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
5842a27b 1371 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
8a73a704 1372{
3263d5a2
KH
1373 int id;
1374 struct charset *cs;
8f924df7 1375
3263d5a2
KH
1376 CHECK_CHARSET_GET_ID (charset, id);
1377 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1378 if (NILP (deunify)
1379 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1380 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1381 return Qnil;
dbbb237d 1382
3263d5a2 1383 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1384 if (NILP (deunify))
1385 {
ecca2aad
KH
1386 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1387 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1388 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1389 if (NILP (unify_map))
1390 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1391 else
ecca2aad
KH
1392 {
1393 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1394 signal_error ("Bad unify-map", unify_map);
1395 CHARSET_UNIFY_MAP (cs) = unify_map;
1396 }
1397 if (NILP (Vchar_unify_table))
1398 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1399 char_table_set_range (Vchar_unify_table,
1400 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1401 CHARSET_UNIFIED_P (cs) = 1;
1402 }
1403 else if (CHAR_TABLE_P (Vchar_unify_table))
1404 {
1405 int min_code = CHARSET_MIN_CODE (cs);
1406 int max_code = CHARSET_MAX_CODE (cs);
1407 int min_char = DECODE_CHAR (cs, min_code);
1408 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1409
dbbb237d
KH
1410 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1411 }
8f924df7 1412
3263d5a2 1413 return Qnil;
8a73a704
KH
1414}
1415
3fac5a51
KH
1416DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1417 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1418 doc: /*
d0cf2d48 1419Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1420DIMENSION is the number of bytes to represent a character: 1 or 2.
1421CHARS is the number of characters in a dimension: 94 or 96.
1422
1423This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1424If there's no unused final char for the specified kind of charset,
fdb82f93 1425return nil. */)
5842a27b 1426 (Lisp_Object dimension, Lisp_Object chars)
3fac5a51
KH
1427{
1428 int final_char;
1429
b7826503
PJ
1430 CHECK_NUMBER (dimension);
1431 CHECK_NUMBER (chars);
3263d5a2
KH
1432 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1433 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1434 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1435 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1436 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1437 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1438 break;
3fac5a51
KH
1439 return (final_char <= '?' ? make_number (final_char) : Qnil);
1440}
1441
3263d5a2 1442static void
971de7fb 1443check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
4ed46869 1444{
3263d5a2
KH
1445 CHECK_NATNUM (dimension);
1446 CHECK_NATNUM (chars);
1447 CHECK_NATNUM (final_char);
4ed46869 1448
3263d5a2
KH
1449 if (XINT (dimension) > 3)
1450 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1451 if (XINT (chars) != 94 && XINT (chars) != 96)
1452 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1453 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1454 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1455}
1456
1457
1458DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1459 4, 4, 0,
cefd8c4f
KH
1460 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1461
1462On decoding by an ISO-2022 base coding system, when a charset
1463specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1464if CHARSET is designated instead. */)
5842a27b 1465 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
3263d5a2
KH
1466{
1467 int id;
82215ce9 1468 int chars_flag;
4ed46869 1469
3263d5a2
KH
1470 CHECK_CHARSET_GET_ID (charset, id);
1471 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1472 chars_flag = XINT (chars) == 96;
1473 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1474 return Qnil;
1475}
1476
3263d5a2 1477
2e344af3
KH
1478/* Return information about charsets in the text at PTR of NBYTES
1479 bytes, which are NCHARS characters. The value is:
f6302ac9 1480
cfe34140 1481 0: Each character is represented by one byte. This is always
3263d5a2
KH
1482 true for a unibyte string. For a multibyte string, true if
1483 it contains only ASCII characters.
1484
28c026cd
DL
1485 1: No charsets other than ascii, control-1, and latin-1 are
1486 found.
1d67c29b 1487
3263d5a2
KH
1488 2: Otherwise.
1489*/
4ed46869
KH
1490
1491int
971de7fb 1492string_xstring_p (Lisp_Object string)
4ed46869 1493{
8f924df7
KH
1494 const unsigned char *p = SDATA (string);
1495 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1496
8f924df7 1497 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1498 return 0;
1499
3263d5a2 1500 while (p < endp)
0282eb69 1501 {
3263d5a2 1502 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1503
3cc67a4d 1504 if (c >= 0x100)
3263d5a2 1505 return 2;
0282eb69 1506 }
3263d5a2
KH
1507 return 1;
1508}
05505664 1509
05505664 1510
3263d5a2 1511/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1512
3cc67a4d
KH
1513 CHARSETS is a vector. If Nth element is non-nil, it means the
1514 charset whose id is N is already found.
2e344af3 1515
3263d5a2 1516 It may lookup a translation table TABLE if supplied. */
2e344af3 1517
3263d5a2 1518static void
971de7fb 1519find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
3263d5a2 1520{
dbbb237d 1521 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1522
1523 if (nchars == nbytes)
3263d5a2 1524 {
3cc67a4d
KH
1525 if (multibyte)
1526 ASET (charsets, charset_ascii, Qt);
1527 else
1528 while (ptr < pend)
1529 {
1530 int c = *ptr++;
1531
1532 if (!NILP (table))
1533 c = translate_char (table, c);
1534 if (ASCII_BYTE_P (c))
1535 ASET (charsets, charset_ascii, Qt);
1536 else
1537 ASET (charsets, charset_eight_bit, Qt);
1538 }
1539 }
1540 else
1541 {
1542 while (ptr < pend)
3263d5a2 1543 {
3cc67a4d
KH
1544 int c = STRING_CHAR_ADVANCE (ptr);
1545 struct charset *charset;
3263d5a2 1546
3cc67a4d
KH
1547 if (!NILP (table))
1548 c = translate_char (table, c);
1549 charset = CHAR_CHARSET (c);
1550 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1551 }
4ed46869 1552 }
4ed46869
KH
1553}
1554
1555DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1556 2, 3, 0,
fdb82f93
PJ
1557 doc: /* Return a list of charsets in the region between BEG and END.
1558BEG and END are buffer positions.
1559Optional arg TABLE if non-nil is a translation table to look up.
1560
fdb82f93
PJ
1561If the current buffer is unibyte, the returned list may contain
1562only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1563 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
4ed46869 1564{
3263d5a2 1565 Lisp_Object charsets;
42ca828e
DL
1566 EMACS_INT from, from_byte, to, stop, stop_byte;
1567 int i;
4ed46869 1568 Lisp_Object val;
3cc67a4d 1569 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1570
1571 validate_region (&beg, &end);
1572 from = XFASTINT (beg);
1573 stop = to = XFASTINT (end);
6ae1f27e 1574
4ed46869 1575 if (from < GPT && GPT < to)
6ae1f27e
RS
1576 {
1577 stop = GPT;
1578 stop_byte = GPT_BYTE;
1579 }
1580 else
1581 stop_byte = CHAR_TO_BYTE (stop);
1582
1583 from_byte = CHAR_TO_BYTE (from);
1584
3263d5a2 1585 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1586 while (1)
1587 {
3263d5a2 1588 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1589 stop_byte - from_byte, charsets, table,
1590 multibyte);
4ed46869 1591 if (stop < to)
6ae1f27e
RS
1592 {
1593 from = stop, from_byte = stop_byte;
1594 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1595 }
4ed46869
KH
1596 else
1597 break;
1598 }
6ae1f27e 1599
4ed46869 1600 val = Qnil;
3263d5a2 1601 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1602 if (!NILP (AREF (charsets, i)))
3263d5a2 1603 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1604 return val;
1605}
1606
1607DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1608 1, 2, 0,
fdb82f93
PJ
1609 doc: /* Return a list of charsets in STR.
1610Optional arg TABLE if non-nil is a translation table to look up.
1611
fdb82f93 1612If STR is unibyte, the returned list may contain
3263d5a2 1613only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1614 (Lisp_Object str, Lisp_Object table)
4ed46869 1615{
3263d5a2 1616 Lisp_Object charsets;
4ed46869
KH
1617 int i;
1618 Lisp_Object val;
1619
b7826503 1620 CHECK_STRING (str);
87b089ad 1621
3263d5a2 1622 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1623 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1624 charsets, table,
1625 STRING_MULTIBYTE (str));
4ed46869 1626 val = Qnil;
3263d5a2 1627 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1628 if (!NILP (AREF (charsets, i)))
3263d5a2 1629 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1630 return val;
1631}
2e344af3 1632
4ed46869 1633\f
3263d5a2 1634
ecca2aad
KH
1635/* Return a unified character code for C (>= 0x110000). VAL is a
1636 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1637 charset symbol. */
1638int
971de7fb 1639maybe_unify_char (int c, Lisp_Object val)
ecca2aad
KH
1640{
1641 struct charset *charset;
1642
1643 if (INTEGERP (val))
1644 return XINT (val);
1645 if (NILP (val))
1646 return c;
1647
1648 CHECK_CHARSET_GET_CHARSET (val, charset);
1649 load_charset (charset, 1);
1650 if (! inhibit_load_charset_map)
1651 {
1652 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1653 if (! NILP (val))
1654 c = XINT (val);
1655 }
1656 else
1657 {
1658 int code_index = c - CHARSET_CODE_OFFSET (charset);
1659 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1660
1661 if (unified > 0)
1662 c = unified;
1663 }
1664 return c;
1665}
1666
1667
3263d5a2
KH
1668/* Return a character correponding to the code-point CODE of
1669 CHARSET. */
1670
1671int
971de7fb 1672decode_char (struct charset *charset, unsigned int code)
4ed46869 1673{
3263d5a2
KH
1674 int c, char_index;
1675 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1676
3263d5a2
KH
1677 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1678 return -1;
4ed46869 1679
374c5cfd 1680 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1681 {
374c5cfd
KH
1682 Lisp_Object subset_info;
1683
1684 subset_info = CHARSET_SUBSET (charset);
1685 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1686 code -= XINT (AREF (subset_info, 3));
1687 if (code < XFASTINT (AREF (subset_info, 1))
1688 || code > XFASTINT (AREF (subset_info, 2)))
1689 c = -1;
1690 else
1691 c = DECODE_CHAR (charset, code);
2e344af3 1692 }
374c5cfd 1693 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1694 {
3263d5a2 1695 Lisp_Object parents;
4ed46869 1696
374c5cfd 1697 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1698 c = -1;
1699 for (; CONSP (parents); parents = XCDR (parents))
1700 {
1701 int id = XINT (XCAR (XCAR (parents)));
1702 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1703 unsigned this_code = code - code_offset;
4ed46869 1704
3263d5a2
KH
1705 charset = CHARSET_FROM_ID (id);
1706 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1707 break;
1708 }
1709 }
1710 else
ac4137cc 1711 {
3263d5a2 1712 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1713 if (char_index < 0)
1714 return -1;
4ed46869 1715
3263d5a2 1716 if (method == CHARSET_METHOD_MAP)
ac4137cc 1717 {
3263d5a2 1718 Lisp_Object decoder;
4ed46869 1719
3263d5a2
KH
1720 decoder = CHARSET_DECODER (charset);
1721 if (! VECTORP (decoder))
ecca2aad
KH
1722 {
1723 load_charset (charset, 1);
1724 decoder = CHARSET_DECODER (charset);
1725 }
1726 if (VECTORP (decoder))
1727 c = XINT (AREF (decoder, char_index));
1728 else
1729 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1730 }
ecca2aad 1731 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1732 {
3263d5a2 1733 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1734 if (CHARSET_UNIFIED_P (charset)
1735 && c > MAX_UNICODE_CHAR)
1736 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1737 }
1738 }
4ed46869 1739
3263d5a2 1740 return c;
90d7b74e
KH
1741}
1742
374c5cfd
KH
1743/* Variable used temporarily by the macro ENCODE_CHAR. */
1744Lisp_Object charset_work;
4ed46869 1745
3263d5a2 1746/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1747 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1748 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1749
3263d5a2 1750unsigned
971de7fb 1751encode_char (struct charset *charset, int c)
9d3d8cba 1752{
3263d5a2
KH
1753 unsigned code;
1754 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1755
3263d5a2 1756 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1757 {
6809ca75 1758 Lisp_Object deunifier;
ecca2aad 1759 int code_index = -1;
4ed46869 1760
3263d5a2
KH
1761 deunifier = CHARSET_DEUNIFIER (charset);
1762 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1763 {
ecca2aad 1764 load_charset (charset, 2);
3263d5a2 1765 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1766 }
ecca2aad
KH
1767 if (CHAR_TABLE_P (deunifier))
1768 {
1769 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1770
1771 if (INTEGERP (deunified))
1772 code_index = XINT (deunified);
1773 }
1774 else
1775 {
1776 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1777 }
1778 if (code_index >= 0)
1779 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1780 }
9d3d8cba 1781
374c5cfd
KH
1782 if (method == CHARSET_METHOD_SUBSET)
1783 {
1784 Lisp_Object subset_info;
1785 struct charset *this_charset;
1786
1787 subset_info = CHARSET_SUBSET (charset);
1788 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1789 code = ENCODE_CHAR (this_charset, c);
1790 if (code == CHARSET_INVALID_CODE (this_charset)
1791 || code < XFASTINT (AREF (subset_info, 1))
1792 || code > XFASTINT (AREF (subset_info, 2)))
1793 return CHARSET_INVALID_CODE (charset);
1794 code += XINT (AREF (subset_info, 3));
1795 return code;
1796 }
9d3d8cba 1797
374c5cfd 1798 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1799 {
3263d5a2 1800 Lisp_Object parents;
d2665018 1801
374c5cfd 1802 parents = CHARSET_SUPERSET (charset);
3263d5a2 1803 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1804 {
3263d5a2
KH
1805 int id = XINT (XCAR (XCAR (parents)));
1806 int code_offset = XINT (XCDR (XCAR (parents)));
1807 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1808
3263d5a2 1809 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1810 if (code != CHARSET_INVALID_CODE (this_charset))
1811 return code + code_offset;
beeedaad 1812 }
3263d5a2
KH
1813 return CHARSET_INVALID_CODE (charset);
1814 }
1bcc1567 1815
15c85a88
KH
1816 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1817 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1818 return CHARSET_INVALID_CODE (charset);
1bcc1567 1819
3263d5a2 1820 if (method == CHARSET_METHOD_MAP)
3f62427c 1821 {
3263d5a2 1822 Lisp_Object encoder;
beeedaad 1823 Lisp_Object val;
9b6a601f 1824
3263d5a2
KH
1825 encoder = CHARSET_ENCODER (charset);
1826 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1827 {
1828 load_charset (charset, 2);
1829 encoder = CHARSET_ENCODER (charset);
1830 }
1831 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1832 {
1833 val = CHAR_TABLE_REF (encoder, c);
1834 if (NILP (val))
1835 return CHARSET_INVALID_CODE (charset);
1836 code = XINT (val);
1837 if (! CHARSET_COMPACT_CODES_P (charset))
1838 code = INDEX_TO_CODE_POINT (charset, code);
1839 }
1840 else
1841 {
1842 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1843 code = INDEX_TO_CODE_POINT (charset, code);
1844 }
3263d5a2 1845 }
820ee249 1846 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1847 {
ecca2aad
KH
1848 int code_index = c - CHARSET_CODE_OFFSET (charset);
1849
1850 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1851 }
8ac5a9cc 1852
3263d5a2 1853 return code;
4ed46869
KH
1854}
1855
4ed46869 1856
3263d5a2
KH
1857DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1858 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1859Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1860
3263d5a2 1861CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1862
3263d5a2 1863Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1864and CODE-POINT to a character. Currently not supported and just ignored. */)
5842a27b 1865 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
4ed46869 1866{
3263d5a2
KH
1867 int c, id;
1868 unsigned code;
1869 struct charset *charsetp;
859f2b3c 1870
3263d5a2
KH
1871 CHECK_CHARSET_GET_ID (charset, id);
1872 if (CONSP (code_point))
1873 {
8f924df7
KH
1874 CHECK_NATNUM_CAR (code_point);
1875 CHECK_NATNUM_CDR (code_point);
69f8de5b 1876 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1877 }
4ed46869
KH
1878 else
1879 {
3263d5a2
KH
1880 CHECK_NATNUM (code_point);
1881 code = XINT (code_point);
4ed46869 1882 }
3263d5a2
KH
1883 charsetp = CHARSET_FROM_ID (id);
1884 c = DECODE_CHAR (charsetp, code);
1885 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1886}
1887
859f2b3c 1888
3263d5a2
KH
1889DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1890 doc: /* Encode the character CH into a code-point of CHARSET.
1891Return nil if CHARSET doesn't include CH.
17e7ef1b 1892
d0cf2d48 1893Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2 1894code-point in CCS. Currently not supported and just ignored. */)
5842a27b 1895 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
4ed46869 1896{
16fed1fc 1897 int id;
3263d5a2
KH
1898 unsigned code;
1899 struct charset *charsetp;
046b1f03 1900
3263d5a2
KH
1901 CHECK_CHARSET_GET_ID (charset, id);
1902 CHECK_NATNUM (ch);
3263d5a2 1903 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1904 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1905 if (code == CHARSET_INVALID_CODE (charsetp))
1906 return Qnil;
1907 if (code > 0x7FFFFFF)
1908 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1909 return make_number (code);
beeedaad
KH
1910}
1911
beeedaad 1912
b121a744
KH
1913DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1914 doc:
1915 /* Return a character of CHARSET whose position codes are CODEn.
1916
1917CODE1 through CODE4 are optional, but if you don't supply sufficient
1918position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1919is specified. */)
5842a27b 1920 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
beeedaad 1921{
3263d5a2
KH
1922 int id, dimension;
1923 struct charset *charsetp;
b121a744
KH
1924 unsigned code;
1925 int c;
87b089ad 1926
3263d5a2
KH
1927 CHECK_CHARSET_GET_ID (charset, id);
1928 charsetp = CHARSET_FROM_ID (id);
4ed46869 1929
b121a744
KH
1930 dimension = CHARSET_DIMENSION (charsetp);
1931 if (NILP (code1))
d47073ca
KH
1932 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1933 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1934 else
859f2b3c 1935 {
b121a744
KH
1936 CHECK_NATNUM (code1);
1937 if (XFASTINT (code1) >= 0x100)
1938 args_out_of_range (make_number (0xFF), code1);
1939 code = XFASTINT (code1);
859f2b3c 1940
b0a1e45e 1941 if (dimension > 1)
beeedaad 1942 {
b121a744 1943 code <<= 8;
b0a1e45e
KH
1944 if (NILP (code2))
1945 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1946 else
b121a744 1947 {
b0a1e45e
KH
1948 CHECK_NATNUM (code2);
1949 if (XFASTINT (code2) >= 0x100)
1950 args_out_of_range (make_number (0xFF), code2);
1951 code |= XFASTINT (code2);
b121a744 1952 }
99529c2c 1953
b0a1e45e 1954 if (dimension > 2)
b121a744
KH
1955 {
1956 code <<= 8;
b0a1e45e
KH
1957 if (NILP (code3))
1958 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1959 else
1960 {
b0a1e45e
KH
1961 CHECK_NATNUM (code3);
1962 if (XFASTINT (code3) >= 0x100)
1963 args_out_of_range (make_number (0xFF), code3);
1964 code |= XFASTINT (code3);
1965 }
1966
1967 if (dimension > 3)
1968 {
1969 code <<= 8;
1970 if (NILP (code4))
1971 code |= charsetp->code_space[0];
1972 else
1973 {
1974 CHECK_NATNUM (code4);
1975 if (XFASTINT (code4) >= 0x100)
1976 args_out_of_range (make_number (0xFF), code4);
1977 code |= XFASTINT (code4);
1978 }
b121a744
KH
1979 }
1980 }
beeedaad 1981 }
859f2b3c 1982 }
beeedaad 1983
b121a744
KH
1984 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1985 code &= 0x7F7F7F7F;
1986 c = DECODE_CHAR (charsetp, code);
1987 if (c < 0)
1988 error ("Invalid code(s)");
3263d5a2 1989 return make_number (c);
4ed46869
KH
1990}
1991
beeedaad 1992
3263d5a2
KH
1993/* Return the first charset in CHARSET_LIST that contains C.
1994 CHARSET_LIST is a list of charset IDs. If it is nil, use
1995 Vcharset_ordered_list. */
beeedaad 1996
3263d5a2 1997struct charset *
971de7fb 1998char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2e344af3 1999{
8a1816bb
KH
2000 int maybe_null = 0;
2001
3263d5a2
KH
2002 if (NILP (charset_list))
2003 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2004 else
2005 maybe_null = 1;
beeedaad 2006
6c652beb 2007 while (CONSP (charset_list))
2e344af3 2008 {
3263d5a2
KH
2009 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2010 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2011
3263d5a2 2012 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2013 {
3263d5a2
KH
2014 if (code_return)
2015 *code_return = code;
2016 return charset;
3f62427c 2017 }
3263d5a2 2018 charset_list = XCDR (charset_list);
c0be27fd
KH
2019 if (! maybe_null
2020 && c <= MAX_UNICODE_CHAR
2021 && EQ (charset_list, Vcharset_non_preferred_head))
6c652beb 2022 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2023 }
8a1816bb
KH
2024 return (maybe_null ? NULL
2025 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2026 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2027}
2028
2e344af3 2029
3263d5a2 2030DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2031 doc:
d0cf2d48 2032 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2033The charset is decided by the current priority order of charsets.
2034A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2035CH in the charset. */)
5842a27b 2036 (Lisp_Object ch)
4ed46869 2037{
3263d5a2
KH
2038 struct charset *charset;
2039 int c, dimension;
2040 unsigned code;
4ed46869
KH
2041 Lisp_Object val;
2042
3263d5a2
KH
2043 CHECK_CHARACTER (ch);
2044 c = XFASTINT (ch);
2045 charset = CHAR_CHARSET (c);
2046 if (! charset)
3cc67a4d 2047 abort ();
3263d5a2
KH
2048 code = ENCODE_CHAR (charset, c);
2049 if (code == CHARSET_INVALID_CODE (charset))
2050 abort ();
2051 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2052 for (val = Qnil; dimension > 0; dimension--)
2053 {
2054 val = Fcons (make_number (code & 0xFF), val);
2055 code >>= 8;
2056 }
3263d5a2 2057 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2058}
2059
740f080d 2060
4cb75c4b
KH
2061DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2062 doc: /* Return the charset of highest priority that contains CH.
2063If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2064from which to find the charset. It may also be a coding system. In
2065that case, find the charset from what supported by that coding system. */)
5842a27b 2066 (Lisp_Object ch, Lisp_Object restriction)
4ed46869 2067{
3263d5a2 2068 struct charset *charset;
4ed46869 2069
3263d5a2 2070 CHECK_CHARACTER (ch);
4cb75c4b
KH
2071 if (NILP (restriction))
2072 charset = CHAR_CHARSET (XINT (ch));
2073 else
2074 {
4cb75c4b
KH
2075 if (CONSP (restriction))
2076 {
7a84eee5
KH
2077 int c = XFASTINT (ch);
2078
2079 for (; CONSP (restriction); restriction = XCDR (restriction))
4cb75c4b 2080 {
7a84eee5 2081 struct charset *charset;
4cb75c4b 2082
7a84eee5
KH
2083 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
2084 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
2085 return XCAR (restriction);
4cb75c4b 2086 }
7a84eee5 2087 return Qnil;
4cb75c4b 2088 }
7a84eee5
KH
2089 restriction = coding_system_charset_list (restriction);
2090 charset = char_charset (XINT (ch), restriction, NULL);
4cb75c4b
KH
2091 if (! charset)
2092 return Qnil;
2093 }
3263d5a2 2094 return (CHARSET_NAME (charset));
4ed46869
KH
2095}
2096
17e7ef1b 2097
3263d5a2
KH
2098DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2099 doc: /*
2100Return charset of a character in the current buffer at position POS.
2101If POS is nil, it defauls to the current point.
2102If POS is out of range, the value is nil. */)
5842a27b 2103 (Lisp_Object pos)
2e344af3 2104{
3263d5a2
KH
2105 Lisp_Object ch;
2106 struct charset *charset;
046b1f03 2107
3263d5a2
KH
2108 ch = Fchar_after (pos);
2109 if (! INTEGERP (ch))
2110 return ch;
2111 charset = CHAR_CHARSET (XINT (ch));
2112 return (CHARSET_NAME (charset));
6ae1f27e 2113}
9036eb45 2114
87b089ad 2115
3263d5a2
KH
2116DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2117 doc: /*
2118Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2119
2120ISO 2022's designation sequence (escape sequence) distinguishes charsets
2121by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2122whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2123See the documentation of the function `charset-info' for the meanings of
2124DIMENSION, CHARS, and FINAL-CHAR. */)
5842a27b 2125 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
6ae1f27e 2126{
3263d5a2 2127 int id;
82215ce9 2128 int chars_flag;
a8a35e61 2129
3263d5a2 2130 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2131 chars_flag = XFASTINT (chars) == 96;
2132 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2133 XFASTINT (final_char));
2134 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2135}
2136
87b089ad 2137
3263d5a2
KH
2138DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2139 0, 0, 0,
2140 doc: /*
ecca2aad
KH
2141Internal use only.
2142Clear temporary charset mapping tables.
2143It should be called only from temacs invoked for dumping. */)
5842a27b 2144 (void)
87b089ad 2145{
ecca2aad 2146 if (temp_charset_work)
87b089ad 2147 {
ecca2aad
KH
2148 free (temp_charset_work);
2149 temp_charset_work = NULL;
2e344af3 2150 }
2e344af3 2151
ecca2aad
KH
2152 if (CHAR_TABLE_P (Vchar_unify_table))
2153 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2154
3263d5a2 2155 return Qnil;
740f080d
KH
2156}
2157
8ddf5e57
DL
2158DEFUN ("charset-priority-list", Fcharset_priority_list,
2159 Scharset_priority_list, 0, 1, 0,
2160 doc: /* Return the list of charsets ordered by priority.
2161HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 2162 (Lisp_Object highestp)
2e344af3 2163{
8ddf5e57 2164 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2165
8ddf5e57 2166 if (!NILP (highestp))
16fed1fc 2167 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2168
8ddf5e57 2169 while (!NILP (list))
2e344af3 2170 {
16fed1fc 2171 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2172 list = XCDR (list);
2e344af3 2173 }
8ddf5e57 2174 return Fnreverse (val);
2e344af3
KH
2175}
2176
8ddf5e57
DL
2177DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2178 1, MANY, 0,
2179 doc: /* Assign higher priority to the charsets given as arguments.
2180usage: (set-charset-priority &rest charsets) */)
5842a27b 2181 (int nargs, Lisp_Object *args)
4ed46869 2182{
af7c60ca 2183 Lisp_Object new_head, old_list, arglist[2];
321c819c 2184 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2185 int i, id;
4ed46869 2186
8ddf5e57 2187 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2188 new_head = Qnil;
8ddf5e57 2189 for (i = 0; i < nargs; i++)
4ed46869 2190 {
8ddf5e57 2191 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2192 if (! NILP (Fmemq (make_number (id), old_list)))
2193 {
2194 old_list = Fdelq (make_number (id), old_list);
2195 new_head = Fcons (make_number (id), new_head);
2196 }
5729c92f 2197 }
8ddf5e57 2198 arglist[0] = Fnreverse (new_head);
6a9c90ec 2199 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2200 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2201 charset_ordered_list_tick++;
5729c92f 2202
6809ca75 2203 charset_unibyte = -1;
321c819c 2204 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2205 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2206 {
e77415b0 2207 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2208 list_2022 = Fcons (XCAR (old_list), list_2022);
2209 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2210 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2211 if (charset_unibyte < 0)
2212 {
2213 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2214
2215 if (CHARSET_DIMENSION (charset) == 1
2216 && CHARSET_ASCII_COMPATIBLE_P (charset)
2217 && CHARSET_MAX_CHAR (charset) >= 0x80)
2218 charset_unibyte = CHARSET_ID (charset);
2219 }
4ed46869 2220 }
321c819c
KH
2221 Viso_2022_charset_list = Fnreverse (list_2022);
2222 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2223 if (charset_unibyte < 0)
2224 charset_unibyte = charset_iso_8859_1;
4ed46869 2225
8ddf5e57 2226 return Qnil;
4ed46869
KH
2227}
2228
d5b33309
KH
2229DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2230 0, 1, 0,
2231 doc: /* Internal use only.
2232Return charset identification number of CHARSET. */)
5842a27b 2233 (Lisp_Object charset)
4ed46869 2234{
d5b33309 2235 int id;
4ed46869 2236
d5b33309
KH
2237 CHECK_CHARSET_GET_ID (charset, id);
2238 return make_number (id);
4ed46869
KH
2239}
2240
7a84eee5
KH
2241struct charset_sort_data
2242{
2243 Lisp_Object charset;
2244 int id;
2245 int priority;
2246};
2247
2248static int
2249charset_compare (const void *d1, const void *d2)
2250{
2251 const struct charset_sort_data *data1 = d1, *data2 = d2;
2252 return (data1->priority - data2->priority);
2253}
2254
2255DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2256 doc: /* Sort charset list CHARSETS by a priority of each charset.
2257Return the sorted list. CHARSETS is modified by side effects.
2258See also `charset-priority-list' and `set-charset-priority'. */)
2259 (Lisp_Object charsets)
2260{
2261 Lisp_Object len = Flength (charsets);
2262 int n = XFASTINT (len), i, j, done;
2263 Lisp_Object tail, elt, attrs;
2264 struct charset_sort_data *sort_data;
2265 int id, min_id, max_id;
2266 USE_SAFE_ALLOCA;
2267
2268 if (n == 0)
2269 return Qnil;
2270 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2271 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2272 {
2273 elt = XCAR (tail);
2274 CHECK_CHARSET_GET_ATTR (elt, attrs);
2275 sort_data[i].charset = elt;
2276 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2277 if (i == 0)
2278 min_id = max_id = id;
2279 else if (id < min_id)
2280 min_id = id;
2281 else if (id > max_id)
2282 max_id = id;
2283 }
2284 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2285 done < n && CONSP (tail); tail = XCDR (tail), i++)
2286 {
2287 elt = XCAR (tail);
2288 id = XFASTINT (elt);
2289 if (id >= min_id && id <= max_id)
2290 for (j = 0; j < n; j++)
2291 if (sort_data[j].id == id)
2292 {
2293 sort_data[j].priority = i;
2294 done++;
2295 }
2296 }
2297 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2298 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2299 XSETCAR (tail, sort_data[i].charset);
2300 SAFE_FREE ();
2301 return charsets;
2302}
2303
4ed46869 2304\f
3263d5a2 2305void
971de7fb 2306init_charset (void)
4ed46869 2307{
c8f94403
GM
2308 Lisp_Object tempdir;
2309 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
6809ca75 2310 if (access ((char *) SDATA (tempdir), 0) < 0)
c8f94403
GM
2311 {
2312 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2313Emacs will not function correctly without the character map files.\n\
2314Please check your installation!\n",
2315 tempdir);
2316 /* TODO should this be a fatal error? (Bug#909) */
2317 }
2318
2319 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2320}
2321
4ed46869 2322
dfcf069d 2323void
971de7fb 2324init_charset_once (void)
4ed46869
KH
2325{
2326 int i, j, k;
2327
3263d5a2
KH
2328 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2329 for (j = 0; j < ISO_MAX_CHARS; j++)
2330 for (k = 0; k < ISO_MAX_FINAL; k++)
2331 iso_charset_table[i][j][k] = -1;
4ed46869 2332
60383934 2333 for (i = 0; i < 256; i++)
3263d5a2 2334 emacs_mule_charset[i] = NULL;
4ed46869 2335
7c7dceee
KH
2336 charset_jisx0201_roman = -1;
2337 charset_jisx0208_1978 = -1;
2338 charset_jisx0208 = -1;
d32320c4 2339 charset_ksc5601 = -1;
4ed46869
KH
2340}
2341
2342#ifdef emacs
2343
dfcf069d 2344void
971de7fb 2345syms_of_charset (void)
4ed46869 2346{
3263d5a2
KH
2347 DEFSYM (Qcharsetp, "charsetp");
2348
2349 DEFSYM (Qascii, "ascii");
2350 DEFSYM (Qunicode, "unicode");
6c652beb 2351 DEFSYM (Qemacs, "emacs");
2fe1edd1 2352 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2353 DEFSYM (Qiso_8859_1, "iso-8859-1");
2354
2355 DEFSYM (Qgl, "gl");
2356 DEFSYM (Qgr, "gr");
2357
3263d5a2
KH
2358 staticpro (&Vcharset_ordered_list);
2359 Vcharset_ordered_list = Qnil;
2360
2361 staticpro (&Viso_2022_charset_list);
2362 Viso_2022_charset_list = Qnil;
2363
2364 staticpro (&Vemacs_mule_charset_list);
2365 Vemacs_mule_charset_list = Qnil;
2366
3943ed76
KH
2367 /* Don't staticpro them here. It's done in syms_of_fns. */
2368 QCtest = intern (":test");
2369 Qeq = intern ("eq");
2370
3263d5a2 2371 staticpro (&Vcharset_hash_table);
8f924df7
KH
2372 {
2373 Lisp_Object args[2];
2374 args[0] = QCtest;
2375 args[1] = Qeq;
2376 Vcharset_hash_table = Fmake_hash_table (2, args);
2377 }
3263d5a2
KH
2378
2379 charset_table_size = 128;
2380 charset_table = ((struct charset *)
2381 xmalloc (sizeof (struct charset) * charset_table_size));
2382 charset_table_used = 0;
2383
3263d5a2
KH
2384 defsubr (&Scharsetp);
2385 defsubr (&Smap_charset_chars);
2386 defsubr (&Sdefine_charset_internal);
2387 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2388 defsubr (&Scharset_plist);
2389 defsubr (&Sset_charset_plist);
2390 defsubr (&Sunify_charset);
3fac5a51 2391 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2392 defsubr (&Sdeclare_equiv_charset);
2393 defsubr (&Sfind_charset_region);
2394 defsubr (&Sfind_charset_string);
3263d5a2
KH
2395 defsubr (&Sdecode_char);
2396 defsubr (&Sencode_char);
4ed46869 2397 defsubr (&Ssplit_char);
3263d5a2 2398 defsubr (&Smake_char);
4ed46869 2399 defsubr (&Schar_charset);
90d7b74e 2400 defsubr (&Scharset_after);
4ed46869 2401 defsubr (&Siso_charset);
3263d5a2 2402 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2403 defsubr (&Scharset_priority_list);
2404 defsubr (&Sset_charset_priority);
d5b33309 2405 defsubr (&Scharset_id_internal);
7a84eee5 2406 defsubr (&Ssort_charsets);
3263d5a2 2407
4beef065 2408 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2409 doc: /* *List of directories to search for charset map files. */);
4beef065 2410 Vcharset_map_path = Qnil;
4ed46869 2411
ecca2aad
KH
2412 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2413 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2414 inhibit_load_charset_map = 0;
2415
4ed46869 2416 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2417 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2418 Vcharset_list = Qnil;
2419
6a9c90ec
KH
2420 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2421 doc: /* ISO639 language mnemonic symbol for the current language environment.
2422If the current language environment is for multiple languages (e.g. "Latin-1"),
2423the value may be a list of mnemonics. */);
2424 Vcurrent_iso639_language = Qnil;
2425
2fe1edd1
KH
2426 charset_ascii
2427 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2428 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2429 charset_iso_8859_1
2430 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2431 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2432 charset_unicode
73fbf2d9 2433 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2434 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2435 charset_emacs
2436 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2437 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2438 charset_eight_bit
2439 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2440 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2441 MAX_5_BYTE_CHAR + 1);
6809ca75 2442 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2443}
2444
2445#endif /* emacs */
cefd8c4f
KH
2446
2447/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2448 (do not change this comment) */