Unify mouse-highlight code for all GUI and TTY sessions.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
114f9c96 3 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 5 2005, 2006, 2007, 2008, 2009, 2010
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869 33#include <sys/types.h>
d7306fe6 34#include <setjmp.h>
4ed46869 35#include "lisp.h"
3263d5a2 36#include "character.h"
4ed46869
KH
37#include "charset.h"
38#include "coding.h"
fc6b09bf 39#include "disptab.h"
3263d5a2 40#include "buffer.h"
4ed46869 41
04c2f2c5 42/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 43
3263d5a2 44 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 45 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
4ed46869 49
3263d5a2
KH
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
04c2f2c5 53 charset_table as a struct charset.
4ed46869 54
3263d5a2 55*/
4ed46869 56
3263d5a2
KH
57/* List of all charsets. This variable is used only from Emacs
58 Lisp. */
4ed46869 59Lisp_Object Vcharset_list;
4ed46869 60
3263d5a2
KH
61/* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63Lisp_Object Vcharset_hash_table;
4ed46869 64
3263d5a2
KH
65/* Table of struct charset. */
66struct charset *charset_table;
4ed46869 67
3263d5a2 68static int charset_table_size;
5af5dd92 69static int charset_table_used;
4ed46869 70
3263d5a2 71Lisp_Object Qcharsetp;
4ed46869 72
3263d5a2
KH
73/* Special charset symbols. */
74Lisp_Object Qascii;
2fe1edd1 75Lisp_Object Qeight_bit;
3263d5a2
KH
76Lisp_Object Qiso_8859_1;
77Lisp_Object Qunicode;
6c652beb 78Lisp_Object Qemacs;
4ed46869 79
3263d5a2
KH
80/* The corresponding charsets. */
81int charset_ascii;
2fe1edd1 82int charset_eight_bit;
3263d5a2
KH
83int charset_iso_8859_1;
84int charset_unicode;
6c652beb 85int charset_emacs;
b0e3cf2b 86
7c7dceee
KH
87/* The other special charsets. */
88int charset_jisx0201_roman;
89int charset_jisx0208_1978;
90int charset_jisx0208;
d32320c4 91int charset_ksc5601;
c1a08b4c 92
3263d5a2
KH
93/* Value of charset attribute `charset-iso-plane'. */
94Lisp_Object Qgl, Qgr;
c1a08b4c 95
d1a04588
KH
96/* Charset of unibyte characters. */
97int charset_unibyte;
4ed46869 98
3263d5a2
KH
99/* List of charsets ordered by the priority. */
100Lisp_Object Vcharset_ordered_list;
4ed46869 101
6a9c90ec
KH
102/* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104Lisp_Object Vcharset_non_preferred_head;
105
dbbb237d 106/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 107 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
108 -1. */
109unsigned short charset_ordered_list_tick;
4ed46869 110
3263d5a2
KH
111/* List of iso-2022 charsets. */
112Lisp_Object Viso_2022_charset_list;
35e623fb 113
3263d5a2
KH
114/* List of emacs-mule charsets. */
115Lisp_Object Vemacs_mule_charset_list;
116
117struct charset *emacs_mule_charset[256];
4ed46869
KH
118
119/* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
121int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
4beef065 123Lisp_Object Vcharset_map_path;
3263d5a2 124
ecca2aad
KH
125/* If nonzero, don't load charset maps. */
126int inhibit_load_charset_map;
3263d5a2 127
6a9c90ec
KH
128Lisp_Object Vcurrent_iso639_language;
129
69f8de5b
KH
130#define CODE_POINT_TO_INDEX(charset, code) \
131 ((charset)->code_linear_p \
132 ? (code) - (charset)->min_code \
133 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
134 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
135 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
136 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
137 ? (((((code) >> 24) - (charset)->code_space[12]) \
138 * (charset)->code_space[11]) \
139 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
140 * (charset)->code_space[7]) \
141 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
142 * (charset)->code_space[3]) \
820ee249
KH
143 + (((code) & 0xFF) - (charset)->code_space[0]) \
144 - ((charset)->char_index_offset)) \
3263d5a2
KH
145 : -1)
146
147
148/* Convert the character index IDX to code-point CODE for CHARSET.
149 It is assumed that IDX is in a valid range. */
150
820ee249
KH
151#define INDEX_TO_CODE_POINT(charset, idx) \
152 ((charset)->code_linear_p \
153 ? (idx) + (charset)->min_code \
154 : (idx += (charset)->char_index_offset, \
155 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
156 | (((charset)->code_space[4] \
157 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
158 << 8) \
159 | (((charset)->code_space[8] \
160 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
161 << 16) \
162 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
163 << 24))))
4cf9710d 164
ecca2aad
KH
165/* Structure to hold mapping tables for a charset. Used by temacs
166 invoked for dumping. */
8a73a704 167
ecca2aad
KH
168static struct
169{
170 /* The current charset for which the following tables are setup. */
171 struct charset *current;
172
173 /* 1 iff the following table is used for encoder. */
174 short for_encoder;
175
176 /* When the following table is used for encoding, mininum and
177 maxinum character of the current charset. */
178 int min_char, max_char;
179
180 /* A Unicode character correspoinding to the code indice 0 (i.e. the
181 minimum code-point) of the current charset, or -1 if the code
182 indice 0 is not a Unicode character. This is checked when
183 table.encoder[CHAR] is zero. */
184 int zero_index_char;
185
186 union {
187 /* Table mapping code-indices (not code-points) of the current
188 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
189 doesn't belong to the current charset. */
190 int decoder[0x10000];
191 /* Table mapping Unicode characters to code-indices of the current
192 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
193 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
194 (0x20000..0x2FFFF). Note that there is no charset map that
195 uses both SMP and SIP. */
196 unsigned short encoder[0x20000];
197 } table;
198} *temp_charset_work;
199
200#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
201 do { \
202 if ((CODE) == 0) \
203 temp_charset_work->zero_index_char = (C); \
204 else if ((C) < 0x20000) \
205 temp_charset_work->table.encoder[(C)] = (CODE); \
206 else \
207 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
208 } while (0)
209
210#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
211 ((C) == temp_charset_work->zero_index_char ? 0 \
212 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
213 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
214 : temp_charset_work->table.encoder[(C) - 0x10000] \
215 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
216
217#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
218 (temp_charset_work->table.decoder[(CODE)] = (C))
219
220#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
221 (temp_charset_work->table.decoder[(CODE)])
046b1f03 222\f
93bcb785 223
e9ce014c
KH
224/* Set to 1 to warn that a charset map is loaded and thus a buffer
225 text and a string data may be relocated. */
3263d5a2 226int charset_map_loaded;
4ed46869 227
e9ce014c 228struct charset_map_entries
4ed46869 229{
e9ce014c
KH
230 struct {
231 unsigned from, to;
232 int c;
233 } entry[0x10000];
234 struct charset_map_entries *next;
235};
236
ecca2aad
KH
237/* Load the mapping information of CHARSET from ENTRIES for
238 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
239 encoding (CONTROL_FLAG == 2).
240
241 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
242 and CHARSET->fast_map.
243
244 If CONTROL_FLAG is 1, setup the following tables according to
245 CHARSET->method and inhibit_load_charset_map.
4cf9710d 246
ecca2aad
KH
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
250 ----------------------+--------------------+---------------------------
251 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 252
ecca2aad 253 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 254
ecca2aad
KH
255 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
258 ----------------------+--------------------+--------------------------
259 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
260*/
4ed46869 261
3263d5a2 262static void
971de7fb 263load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
4ed46869 264{
3263d5a2 265 Lisp_Object vec, table;
3263d5a2
KH
266 unsigned max_code = CHARSET_MAX_CODE (charset);
267 int ascii_compatible_p = charset->ascii_compatible_p;
268 int min_char, max_char, nonascii_min_char;
3263d5a2 269 int i;
3263d5a2 270 unsigned char *fast_map = charset->fast_map;
99529c2c 271
e9ce014c
KH
272 if (n_entries <= 0)
273 return;
274
ecca2aad 275 if (control_flag)
8ac5a9cc 276 {
ecca2aad
KH
277 if (! inhibit_load_charset_map)
278 {
279 if (control_flag == 1)
280 {
281 if (charset->method == CHARSET_METHOD_MAP)
282 {
283 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 284
ecca2aad
KH
285 vec = CHARSET_DECODER (charset)
286 = Fmake_vector (make_number (n), make_number (-1));
287 }
288 else
289 {
290 char_table_set_range (Vchar_unify_table,
291 charset->min_char, charset->max_char,
292 Qnil);
293 }
294 }
295 else
296 {
297 table = Fmake_char_table (Qnil, Qnil);
298 if (charset->method == CHARSET_METHOD_MAP)
299 CHARSET_ENCODER (charset) = table;
300 else
301 CHARSET_DEUNIFIER (charset) = table;
302 }
303 }
304 else
305 {
306 if (! temp_charset_work)
307 temp_charset_work = malloc (sizeof (*temp_charset_work));
308 if (control_flag == 1)
309 {
310 memset (temp_charset_work->table.decoder, -1,
311 sizeof (int) * 0x10000);
ecca2aad
KH
312 }
313 else
314 {
315 memset (temp_charset_work->table.encoder, 0,
316 sizeof (unsigned short) * 0x20000);
317 temp_charset_work->zero_index_char = -1;
318 }
319 temp_charset_work->current = charset;
320 temp_charset_work->for_encoder = (control_flag == 2);
321 control_flag += 2;
322 }
3263d5a2 323 charset_map_loaded = 1;
2e344af3 324 }
6662e69b 325
e9ce014c 326 min_char = max_char = entries->entry[0].c;
3263d5a2 327 nonascii_min_char = MAX_CHAR;
e9ce014c 328 for (i = 0; i < n_entries; i++)
2e344af3 329 {
e9ce014c 330 unsigned from, to;
3b4f4446
KH
331 int from_index, to_index;
332 int from_c, to_c;
e9ce014c 333 int idx = i % 0x10000;
3263d5a2 334
e9ce014c
KH
335 if (i > 0 && idx == 0)
336 entries = entries->next;
337 from = entries->entry[idx].from;
338 to = entries->entry[idx].to;
3b4f4446
KH
339 from_c = entries->entry[idx].c;
340 from_index = CODE_POINT_TO_INDEX (charset, from);
341 if (from == to)
6662e69b 342 {
3b4f4446
KH
343 to_index = from_index;
344 to_c = from_c;
6662e69b 345 }
3b4f4446 346 else
6662e69b 347 {
3b4f4446
KH
348 to_index = CODE_POINT_TO_INDEX (charset, to);
349 to_c = from_c + (to_index - from_index);
6662e69b 350 }
3b4f4446
KH
351 if (from_index < 0 || to_index < 0)
352 continue;
3263d5a2 353
ecca2aad
KH
354 if (to_c > max_char)
355 max_char = to_c;
356 else if (from_c < min_char)
357 min_char = from_c;
6662e69b 358
ecca2aad
KH
359 if (control_flag == 1)
360 {
361 if (charset->method == CHARSET_METHOD_MAP)
362 for (; from_index <= to_index; from_index++, from_c++)
363 ASET (vec, from_index, make_number (from_c));
364 else
365 for (; from_index <= to_index; from_index++, from_c++)
366 CHAR_TABLE_SET (Vchar_unify_table,
367 CHARSET_CODE_OFFSET (charset) + from_index,
368 make_number (from_c));
369 }
370 else if (control_flag == 2)
371 {
372 if (charset->method == CHARSET_METHOD_MAP
373 && CHARSET_COMPACT_CODES_P (charset))
374 for (; from_index <= to_index; from_index++, from_c++)
375 {
376 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
377
378 if (NILP (CHAR_TABLE_REF (table, from_c)))
379 CHAR_TABLE_SET (table, from_c, make_number (code));
380 }
381 else
382 for (; from_index <= to_index; from_index++, from_c++)
383 {
384 if (NILP (CHAR_TABLE_REF (table, from_c)))
385 CHAR_TABLE_SET (table, from_c, make_number (from_index));
386 }
387 }
388 else if (control_flag == 3)
389 for (; from_index <= to_index; from_index++, from_c++)
390 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
391 else if (control_flag == 4)
392 for (; from_index <= to_index; from_index++, from_c++)
393 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
394 else /* control_flag == 0 */
395 {
3b4f4446
KH
396 if (ascii_compatible_p)
397 {
398 if (! ASCII_BYTE_P (from_c))
399 {
400 if (from_c < nonascii_min_char)
401 nonascii_min_char = from_c;
402 }
403 else if (! ASCII_BYTE_P (to_c))
404 {
405 nonascii_min_char = 0x80;
406 }
407 }
177c0ea7 408
ecca2aad
KH
409 for (; from_c <= to_c; from_c++)
410 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 411 }
8ac5a9cc 412 }
3263d5a2 413
ecca2aad 414 if (control_flag == 0)
4ed46869 415 {
3263d5a2
KH
416 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
417 ? nonascii_min_char : min_char);
418 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 419 }
ecca2aad
KH
420 else if (control_flag == 4)
421 {
422 temp_charset_work->min_char = min_char;
423 temp_charset_work->max_char = max_char;
424 }
4ed46869
KH
425}
426
12bcae05 427
3263d5a2 428/* Read a hexadecimal number (preceded by "0x") from the file FP while
3b59c351 429 paying attention to comment character '#'. */
12bcae05 430
3263d5a2 431static INLINE unsigned
971de7fb 432read_hex (FILE *fp, int *eof)
12bcae05 433{
3263d5a2
KH
434 int c;
435 unsigned n;
12bcae05 436
3263d5a2
KH
437 while ((c = getc (fp)) != EOF)
438 {
69f8de5b 439 if (c == '#')
3263d5a2
KH
440 {
441 while ((c = getc (fp)) != EOF && c != '\n');
442 }
443 else if (c == '0')
444 {
445 if ((c = getc (fp)) == EOF || c == 'x')
446 break;
447 }
8f924df7 448 }
3263d5a2
KH
449 if (c == EOF)
450 {
451 *eof = 1;
452 return 0;
453 }
454 *eof = 0;
455 n = 0;
456 if (c == 'x')
457 while ((c = getc (fp)) != EOF && isxdigit (c))
458 n = ((n << 4)
459 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
460 else
461 while ((c = getc (fp)) != EOF && isdigit (c))
462 n = (n * 10) + c - '0';
e9ce014c
KH
463 if (c != EOF)
464 ungetc (c, fp);
3263d5a2
KH
465 return n;
466}
12bcae05 467
3263d5a2 468/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
469 Each line of MAPFILE has this form
470 0xAAAA 0xCCCC
471 where 0xAAAA is a code-point and 0xCCCC is the corresponding
472 character code, or this form
473 0xAAAA-0xBBBB 0xCCCC
474 where 0xAAAA and 0xBBBB are code-points specifying a range, and
475 0xCCCC is the first character code of the range.
4ed46869 476
3263d5a2
KH
477 The returned vector has this form:
478 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 479 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
480 range.
481
18a10a21
JB
482 Note that this function uses `openp' to open MAPFILE but ignores
483 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 484
e9ce014c 485static void
971de7fb 486load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
4ed46869 487{
e9ce014c
KH
488 unsigned min_code = CHARSET_MIN_CODE (charset);
489 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
490 int fd;
491 FILE *fp;
3263d5a2
KH
492 int eof;
493 Lisp_Object suffixes;
e9ce014c 494 struct charset_map_entries *head, *entries;
a2f3eb19
CY
495 int n_entries, count;
496 USE_SAFE_ALLOCA;
4ed46869 497
3263d5a2
KH
498 suffixes = Fcons (build_string (".map"),
499 Fcons (build_string (".TXT"), Qnil));
4ed46869 500
a2f3eb19 501 count = SPECPDL_INDEX ();
dde2559c 502 specbind (Qfile_name_handler_alist, Qnil);
4beef065 503 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
dde2559c 504 unbind_to (count, Qnil);
3263d5a2
KH
505 if (fd < 0
506 || ! (fp = fdopen (fd, "r")))
8458d4c1 507 error ("Failure in loading charset map: %S", SDATA (mapfile));
4ed46869 508
a2f3eb19
CY
509 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
510 large (larger than MAX_ALLOCA). */
511 SAFE_ALLOCA (head, struct charset_map_entries *,
512 sizeof (struct charset_map_entries));
513 entries = head;
72af86bd 514 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 515
e9ce014c 516 n_entries = 0;
3263d5a2
KH
517 eof = 0;
518 while (1)
519 {
e9ce014c
KH
520 unsigned from, to;
521 int c;
522 int idx;
4ed46869 523
e9ce014c 524 from = read_hex (fp, &eof);
3263d5a2
KH
525 if (eof)
526 break;
e9ce014c
KH
527 if (getc (fp) == '-')
528 to = read_hex (fp, &eof);
529 else
530 to = from;
531 c = (int) read_hex (fp, &eof);
ac4137cc 532
e9ce014c
KH
533 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
534 continue;
ac4137cc 535
e9ce014c 536 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 537 {
a2f3eb19
CY
538 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
539 sizeof (struct charset_map_entries));
e9ce014c 540 entries = entries->next;
72af86bd 541 memset (entries, 0, sizeof (struct charset_map_entries));
3263d5a2 542 }
e9ce014c
KH
543 idx = n_entries % 0x10000;
544 entries->entry[idx].from = from;
545 entries->entry[idx].to = to;
546 entries->entry[idx].c = c;
547 n_entries++;
3263d5a2
KH
548 }
549 fclose (fp);
177c0ea7 550
e9ce014c 551 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 552 SAFE_FREE ();
4ed46869
KH
553}
554
e9ce014c 555static void
971de7fb 556load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
23d2a7f1 557{
e9ce014c
KH
558 unsigned min_code = CHARSET_MIN_CODE (charset);
559 unsigned max_code = CHARSET_MAX_CODE (charset);
560 struct charset_map_entries *head, *entries;
561 int n_entries;
562 int len = ASIZE (vec);
563 int i;
a2f3eb19 564 USE_SAFE_ALLOCA;
23d2a7f1 565
e9ce014c 566 if (len % 2 == 1)
3263d5a2 567 {
e9ce014c
KH
568 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
569 return;
3263d5a2 570 }
35e623fb 571
a2f3eb19
CY
572 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
573 large (larger than MAX_ALLOCA). */
574 SAFE_ALLOCA (head, struct charset_map_entries *,
575 sizeof (struct charset_map_entries));
576 entries = head;
72af86bd 577 memset (entries, 0, sizeof (struct charset_map_entries));
a2f3eb19 578
e9ce014c
KH
579 n_entries = 0;
580 for (i = 0; i < len; i += 2)
35e623fb 581 {
e9ce014c
KH
582 Lisp_Object val, val2;
583 unsigned from, to;
584 int c;
585 int idx;
d2665018 586
e9ce014c
KH
587 val = AREF (vec, i);
588 if (CONSP (val))
bbf12bb3 589 {
e9ce014c
KH
590 val2 = XCDR (val);
591 val = XCAR (val);
592 CHECK_NATNUM (val);
593 CHECK_NATNUM (val2);
594 from = XFASTINT (val);
595 to = XFASTINT (val2);
bbf12bb3 596 }
e9ce014c 597 else
bbf12bb3 598 {
e9ce014c
KH
599 CHECK_NATNUM (val);
600 from = to = XFASTINT (val);
bbf12bb3 601 }
e9ce014c
KH
602 val = AREF (vec, i + 1);
603 CHECK_NATNUM (val);
604 c = XFASTINT (val);
76d7b829 605
e9ce014c
KH
606 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
607 continue;
76d7b829 608
dcc694d7 609 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 610 {
a2f3eb19
CY
611 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
612 sizeof (struct charset_map_entries));
e9ce014c 613 entries = entries->next;
72af86bd 614 memset (entries, 0, sizeof (struct charset_map_entries));
e9ce014c
KH
615 }
616 idx = n_entries % 0x10000;
617 entries->entry[idx].from = from;
618 entries->entry[idx].to = to;
619 entries->entry[idx].c = c;
620 n_entries++;
621 }
76d7b829 622
e9ce014c 623 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 624 SAFE_FREE ();
ac4137cc
KH
625}
626
ecca2aad
KH
627
628/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
629 map it is (see the comment of load_charset_map for the detail). */
630
3263d5a2 631static void
971de7fb 632load_charset (struct charset *charset, int control_flag)
76d7b829 633{
ecca2aad 634 Lisp_Object map;
76d7b829 635
ecca2aad
KH
636 if (inhibit_load_charset_map
637 && temp_charset_work
638 && charset == temp_charset_work->current
56f00ed2 639 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
640 return;
641
642 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
643 map = CHARSET_MAP (charset);
644 else if (CHARSET_UNIFIED_P (charset))
645 map = CHARSET_UNIFY_MAP (charset);
646 if (STRINGP (map))
647 load_charset_map_from_file (charset, map, control_flag);
648 else
649 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 650}
76d7b829 651
3263d5a2
KH
652
653DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
654 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
5842a27b 655 (Lisp_Object object)
23d2a7f1 656{
3263d5a2 657 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
658}
659
4ed46869 660
f57e2426
J
661void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
662 Lisp_Object function, Lisp_Object arg,
663 unsigned from, unsigned to);
ecca2aad
KH
664
665void
971de7fb 666map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
ecca2aad
KH
667{
668 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
669 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
670 Lisp_Object range;
671 int c, stop;
672 struct gcpro gcpro1;
673
674 range = Fcons (Qnil, Qnil);
675 GCPRO1 (range);
676
677 c = temp_charset_work->min_char;
678 stop = (temp_charset_work->max_char < 0x20000
679 ? temp_charset_work->max_char : 0xFFFF);
680
681 while (1)
682 {
683 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
684
685 if (index >= from_idx && index <= to_idx)
686 {
687 if (NILP (XCAR (range)))
688 XSETCAR (range, make_number (c));
689 }
690 else if (! NILP (XCAR (range)))
691 {
692 XSETCDR (range, make_number (c - 1));
693 if (c_function)
694 (*c_function) (arg, range);
695 else
696 call2 (function, range, arg);
697 XSETCAR (range, Qnil);
698 }
699 if (c == stop)
700 {
701 if (c == temp_charset_work->max_char)
702 {
703 if (! NILP (XCAR (range)))
704 {
705 XSETCDR (range, make_number (c));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
710 }
711 break;
712 }
713 c = 0x1FFFF;
714 stop = temp_charset_work->max_char;
715 }
716 c++;
717 }
c542407d 718 UNGCPRO;
ecca2aad
KH
719}
720
4ed46869 721void
6f704c76
DN
722map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
723 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
4ed46869 724{
3263d5a2 725 Lisp_Object range;
374c5cfd 726 int partial;
3263d5a2 727
374c5cfd
KH
728 partial = (from > CHARSET_MIN_CODE (charset)
729 || to < CHARSET_MAX_CODE (charset));
730
3263d5a2 731 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 732 {
374c5cfd
KH
733 int from_idx = CODE_POINT_TO_INDEX (charset, from);
734 int to_idx = CODE_POINT_TO_INDEX (charset, to);
735 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
736 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
737
ecca2aad
KH
738 if (CHARSET_UNIFIED_P (charset))
739 {
740 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
741 load_charset (charset, 2);
742 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
743 map_char_table_for_charset (c_function, function,
744 CHARSET_DEUNIFIER (charset), arg,
745 partial ? charset : NULL, from, to);
746 else
747 map_charset_for_dump (c_function, function, arg, from, to);
748 }
749
374c5cfd 750 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 751 if (NILP (function))
5af5dd92 752 (*c_function) (arg, range);
3263d5a2
KH
753 else
754 call2 (function, range, arg);
c83ef371 755 }
3263d5a2
KH
756 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
757 {
758 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
759 load_charset (charset, 2);
760 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
761 map_char_table_for_charset (c_function, function,
762 CHARSET_ENCODER (charset), arg,
763 partial ? charset : NULL, from, to);
764 else
765 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 766 }
374c5cfd 767 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 768 {
374c5cfd
KH
769 Lisp_Object subset_info;
770 int offset;
771
772 subset_info = CHARSET_SUBSET (charset);
773 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
774 offset = XINT (AREF (subset_info, 3));
775 from -= offset;
776 if (from < XFASTINT (AREF (subset_info, 1)))
777 from = XFASTINT (AREF (subset_info, 1));
778 to -= offset;
779 if (to > XFASTINT (AREF (subset_info, 2)))
780 to = XFASTINT (AREF (subset_info, 2));
781 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 782 }
374c5cfd
KH
783 else /* i.e. CHARSET_METHOD_SUPERSET */
784 {
785 Lisp_Object parents;
4ed46869 786
374c5cfd
KH
787 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
788 parents = XCDR (parents))
bbf12bb3 789 {
374c5cfd
KH
790 int offset;
791 unsigned this_from, this_to;
792
793 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
794 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
795 this_from = from > offset ? from - offset : 0;
796 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
797 if (this_from < CHARSET_MIN_CODE (charset))
798 this_from = CHARSET_MIN_CODE (charset);
799 if (this_to > CHARSET_MAX_CODE (charset))
800 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
801 map_charset_chars (c_function, function, arg, charset,
802 this_from, this_to);
bbf12bb3 803 }
35e623fb 804 }
4ed46869
KH
805}
806
374c5cfd 807DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 808 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 809FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 810argument ARG.
4ed46869 811
374c5cfd
KH
812RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
813characters contained in CHARSET.
4ed46869 814
374c5cfd 815The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 816range of code points (in CHARSET) of target characters. */)
5842a27b 817 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
4ed46869 818{
374c5cfd 819 struct charset *cs;
16fed1fc 820 unsigned from, to;
4ed46869 821
374c5cfd
KH
822 CHECK_CHARSET_GET_CHARSET (charset, cs);
823 if (NILP (from_code))
16fed1fc 824 from = CHARSET_MIN_CODE (cs);
970b7474 825 else
4ed46869 826 {
970b7474
KH
827 CHECK_NATNUM (from_code);
828 from = XINT (from_code);
829 if (from < CHARSET_MIN_CODE (cs))
830 from = CHARSET_MIN_CODE (cs);
4ed46869 831 }
374c5cfd 832 if (NILP (to_code))
970b7474 833 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
834 else
835 {
970b7474
KH
836 CHECK_NATNUM (to_code);
837 to = XINT (to_code);
838 if (to > CHARSET_MAX_CODE (cs))
839 to = CHARSET_MAX_CODE (cs);
4ed46869 840 }
16fed1fc 841 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 842 return Qnil;
35e623fb 843}
4ed46869 844
4ed46869 845
3263d5a2
KH
846/* Define a charset according to the arguments. The Nth argument is
847 the Nth attribute of the charset (the last attribute `charset-id'
848 is not included). See the docstring of `define-charset' for the
849 detail. */
4ed46869 850
3263d5a2
KH
851DEFUN ("define-charset-internal", Fdefine_charset_internal,
852 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
853 doc: /* For internal use only.
854usage: (define-charset-internal ...) */)
5842a27b 855 (int nargs, Lisp_Object *args)
4ed46869 856{
3263d5a2
KH
857 /* Charset attr vector. */
858 Lisp_Object attrs;
859 Lisp_Object val;
860 unsigned hash_code;
861 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 862 int i, j;
3263d5a2
KH
863 struct charset charset;
864 int id;
865 int dimension;
866 int new_definition_p;
867 int nchars;
868
869 if (nargs != charset_arg_max)
870 return Fsignal (Qwrong_number_of_arguments,
871 Fcons (intern ("define-charset-internal"),
872 make_number (nargs)));
873
874 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
875
876 CHECK_SYMBOL (args[charset_arg_name]);
877 ASET (attrs, charset_name, args[charset_arg_name]);
878
879 val = args[charset_arg_code_space];
880 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 881 {
3263d5a2
KH
882 int min_byte, max_byte;
883
884 min_byte = XINT (Faref (val, make_number (i * 2)));
885 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
886 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
887 error ("Invalid :code-space value");
888 charset.code_space[i * 4] = min_byte;
889 charset.code_space[i * 4 + 1] = max_byte;
890 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
891 nchars *= charset.code_space[i * 4 + 2];
892 charset.code_space[i * 4 + 3] = nchars;
893 if (max_byte > 0)
894 dimension = i + 1;
895 }
4ed46869 896
3263d5a2
KH
897 val = args[charset_arg_dimension];
898 if (NILP (val))
899 charset.dimension = dimension;
900 else
4ed46869 901 {
3263d5a2
KH
902 CHECK_NATNUM (val);
903 charset.dimension = XINT (val);
904 if (charset.dimension < 1 || charset.dimension > 4)
905 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
906 }
907
3263d5a2
KH
908 charset.code_linear_p
909 = (charset.dimension == 1
910 || (charset.code_space[2] == 256
911 && (charset.dimension == 2
912 || (charset.code_space[6] == 256
913 && (charset.dimension == 3
914 || charset.code_space[10] == 256)))));
915
69f8de5b 916 if (! charset.code_linear_p)
4ed46869 917 {
69f8de5b 918 charset.code_space_mask = (unsigned char *) xmalloc (256);
72af86bd 919 memset (charset.code_space_mask, 0, 256);
69f8de5b
KH
920 for (i = 0; i < 4; i++)
921 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
922 j++)
923 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
924 }
925
3263d5a2 926 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 927
3263d5a2
KH
928 charset.min_code = (charset.code_space[0]
929 | (charset.code_space[4] << 8)
930 | (charset.code_space[8] << 16)
931 | (charset.code_space[12] << 24));
932 charset.max_code = (charset.code_space[1]
933 | (charset.code_space[5] << 8)
934 | (charset.code_space[9] << 16)
935 | (charset.code_space[13] << 24));
820ee249 936 charset.char_index_offset = 0;
8a73a704 937
820ee249
KH
938 val = args[charset_arg_min_code];
939 if (! NILP (val))
940 {
941 unsigned code;
fdb82f93 942
820ee249
KH
943 if (INTEGERP (val))
944 code = XINT (val);
945 else
946 {
947 CHECK_CONS (val);
8f924df7
KH
948 CHECK_NUMBER_CAR (val);
949 CHECK_NUMBER_CDR (val);
820ee249
KH
950 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
951 }
952 if (code < charset.min_code
953 || code > charset.max_code)
954 args_out_of_range_3 (make_number (charset.min_code),
955 make_number (charset.max_code), val);
956 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
957 charset.min_code = code;
958 }
3fac5a51 959
820ee249
KH
960 val = args[charset_arg_max_code];
961 if (! NILP (val))
3fac5a51 962 {
820ee249
KH
963 unsigned code;
964
965 if (INTEGERP (val))
966 code = XINT (val);
967 else
968 {
969 CHECK_CONS (val);
8f924df7
KH
970 CHECK_NUMBER_CAR (val);
971 CHECK_NUMBER_CDR (val);
820ee249
KH
972 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
973 }
974 if (code < charset.min_code
975 || code > charset.max_code)
976 args_out_of_range_3 (make_number (charset.min_code),
977 make_number (charset.max_code), val);
978 charset.max_code = code;
3fac5a51 979 }
3fac5a51 980
ecca2aad 981 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 982
3263d5a2
KH
983 val = args[charset_arg_invalid_code];
984 if (NILP (val))
985 {
986 if (charset.min_code > 0)
987 charset.invalid_code = 0;
bbf12bb3
KH
988 else
989 {
3263d5a2
KH
990 XSETINT (val, charset.max_code + 1);
991 if (XINT (val) == charset.max_code + 1)
992 charset.invalid_code = charset.max_code + 1;
993 else
994 error ("Attribute :invalid-code must be specified");
76d7b829 995 }
76d7b829 996 }
3263d5a2
KH
997 else
998 {
999 CHECK_NATNUM (val);
1000 charset.invalid_code = XFASTINT (val);
1001 }
4ed46869 1002
3263d5a2
KH
1003 val = args[charset_arg_iso_final];
1004 if (NILP (val))
1005 charset.iso_final = -1;
1006 else
1007 {
1008 CHECK_NUMBER (val);
1009 if (XINT (val) < '0' || XINT (val) > 127)
1010 error ("Invalid iso-final-char: %d", XINT (val));
1011 charset.iso_final = XINT (val);
1012 }
4ed46869 1013
3263d5a2
KH
1014 val = args[charset_arg_iso_revision];
1015 if (NILP (val))
1016 charset.iso_revision = -1;
1017 else
4ed46869 1018 {
3263d5a2
KH
1019 CHECK_NUMBER (val);
1020 if (XINT (val) > 63)
1021 args_out_of_range (make_number (63), val);
1022 charset.iso_revision = XINT (val);
4ed46869 1023 }
4ed46869 1024
3263d5a2
KH
1025 val = args[charset_arg_emacs_mule_id];
1026 if (NILP (val))
1027 charset.emacs_mule_id = -1;
4ed46869
KH
1028 else
1029 {
3263d5a2
KH
1030 CHECK_NATNUM (val);
1031 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1032 error ("Invalid emacs-mule-id: %d", XINT (val));
1033 charset.emacs_mule_id = XINT (val);
c83ef371 1034 }
f6302ac9 1035
3263d5a2 1036 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1037
3263d5a2 1038 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1039
3263d5a2
KH
1040 charset.unified_p = 0;
1041
72af86bd 1042 memset (charset.fast_map, 0, sizeof (charset.fast_map));
3263d5a2
KH
1043
1044 if (! NILP (args[charset_arg_code_offset]))
1045 {
1046 val = args[charset_arg_code_offset];
1047 CHECK_NUMBER (val);
1048
1049 charset.method = CHARSET_METHOD_OFFSET;
1050 charset.code_offset = XINT (val);
1051
1052 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1053 charset.min_char = i + charset.code_offset;
1054 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1055 charset.max_char = i + charset.code_offset;
1056 if (charset.max_char > MAX_CHAR)
1057 error ("Unsupported max char: %d", charset.max_char);
1058
f148205f
KH
1059 i = (charset.min_char >> 7) << 7;
1060 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1061 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1062 i = (i >> 12) << 12;
3263d5a2
KH
1063 for (; i <= charset.max_char; i += 0x1000)
1064 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1065 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1066 charset.ascii_compatible_p = 1;
3263d5a2
KH
1067 }
1068 else if (! NILP (args[charset_arg_map]))
1069 {
1070 val = args[charset_arg_map];
1071 ASET (attrs, charset_map, val);
ecca2aad 1072 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1073 }
374c5cfd 1074 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1075 {
374c5cfd
KH
1076 Lisp_Object parent;
1077 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1078 struct charset *parent_charset;
1079
1080 val = args[charset_arg_subset];
1081 parent = Fcar (val);
1082 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1083 parent_min_code = Fnth (make_number (1), val);
1084 CHECK_NATNUM (parent_min_code);
1085 parent_max_code = Fnth (make_number (2), val);
1086 CHECK_NATNUM (parent_max_code);
1087 parent_code_offset = Fnth (make_number (3), val);
1088 CHECK_NUMBER (parent_code_offset);
1089 val = Fmake_vector (make_number (4), Qnil);
1090 ASET (val, 0, make_number (parent_charset->id));
1091 ASET (val, 1, parent_min_code);
1092 ASET (val, 2, parent_max_code);
1093 ASET (val, 3, parent_code_offset);
1094 ASET (attrs, charset_subset, val);
1095
1096 charset.method = CHARSET_METHOD_SUBSET;
1097 /* Here, we just copy the parent's fast_map. It's not accurate,
1098 but at least it works for quickly detecting which character
1099 DOESN'T belong to this charset. */
1100 for (i = 0; i < 190; i++)
1101 charset.fast_map[i] = parent_charset->fast_map[i];
1102
1103 /* We also copy these for parents. */
1104 charset.min_char = parent_charset->min_char;
1105 charset.max_char = parent_charset->max_char;
1106 }
1107 else if (! NILP (args[charset_arg_superset]))
0282eb69 1108 {
374c5cfd
KH
1109 val = args[charset_arg_superset];
1110 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1111 val = Fcopy_sequence (val);
374c5cfd 1112 ASET (attrs, charset_superset, val);
3263d5a2
KH
1113
1114 charset.min_char = MAX_CHAR;
1115 charset.max_char = 0;
1116 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1117 {
3263d5a2
KH
1118 Lisp_Object elt, car_part, cdr_part;
1119 int this_id, offset;
1120 struct charset *this_charset;
2e344af3 1121
3263d5a2
KH
1122 elt = Fcar (val);
1123 if (CONSP (elt))
2e344af3 1124 {
3263d5a2
KH
1125 car_part = XCAR (elt);
1126 cdr_part = XCDR (elt);
1127 CHECK_CHARSET_GET_ID (car_part, this_id);
1128 CHECK_NUMBER (cdr_part);
1129 offset = XINT (cdr_part);
177c0ea7 1130 }
3263d5a2 1131 else
4ed46869 1132 {
3263d5a2
KH
1133 CHECK_CHARSET_GET_ID (elt, this_id);
1134 offset = 0;
4ed46869 1135 }
3263d5a2
KH
1136 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1137
1138 this_charset = CHARSET_FROM_ID (this_id);
1139 if (charset.min_char > this_charset->min_char)
1140 charset.min_char = this_charset->min_char;
1141 if (charset.max_char < this_charset->max_char)
1142 charset.max_char = this_charset->max_char;
1143 for (i = 0; i < 190; i++)
1144 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1145 }
0282eb69 1146 }
2e344af3 1147 else
3263d5a2 1148 error ("None of :code-offset, :map, :parents are specified");
05505664 1149
3263d5a2
KH
1150 val = args[charset_arg_unify_map];
1151 if (! NILP (val) && !STRINGP (val))
1152 CHECK_VECTOR (val);
1153 ASET (attrs, charset_unify_map, val);
05505664 1154
3263d5a2
KH
1155 CHECK_LIST (args[charset_arg_plist]);
1156 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1157
3263d5a2
KH
1158 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1159 &hash_code);
1160 if (charset.hash_index >= 0)
1161 {
1162 new_definition_p = 0;
4f65af01 1163 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1164 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1165 }
1a45ff10 1166 else
3263d5a2
KH
1167 {
1168 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1169 hash_code);
1170 if (charset_table_used == charset_table_size)
1171 {
2fe1edd1
KH
1172 struct charset *new_table
1173 = (struct charset *) xmalloc (sizeof (struct charset)
1174 * (charset_table_size + 16));
72af86bd
AS
1175 memcpy (new_table, charset_table,
1176 sizeof (struct charset) * charset_table_size);
2fe1edd1
KH
1177 charset_table_size += 16;
1178 charset_table = new_table;
3263d5a2
KH
1179 }
1180 id = charset_table_used++;
3263d5a2
KH
1181 new_definition_p = 1;
1182 }
2e344af3 1183
4f65af01 1184 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1185 charset.id = id;
1186 charset_table[id] = charset;
2e344af3 1187
ecca2aad 1188 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1189 {
1190 load_charset (&charset, 0);
1191 charset_table[id] = charset;
1192 }
ecca2aad 1193
3263d5a2 1194 if (charset.iso_final >= 0)
4ed46869 1195 {
3263d5a2
KH
1196 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1197 charset.iso_final) = id;
1198 if (new_definition_p)
1199 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1200 Fcons (make_number (id), Qnil));
7c7dceee
KH
1201 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1202 charset_jisx0201_roman = id;
1203 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1204 charset_jisx0208_1978 = id;
1205 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1206 charset_jisx0208 = id;
d32320c4
KH
1207 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1208 charset_ksc5601 = id;
4ed46869 1209 }
d0cf2d48 1210
3263d5a2 1211 if (charset.emacs_mule_id >= 0)
4ed46869 1212 {
3263d5a2 1213 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1214 if (charset.emacs_mule_id < 0xA0)
1215 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1216 else
1217 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1218 if (new_definition_p)
1219 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1220 Fcons (make_number (id), Qnil));
4ed46869
KH
1221 }
1222
3263d5a2
KH
1223 if (new_definition_p)
1224 {
1225 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1226 if (charset.supplementary_p)
1227 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1228 Fcons (make_number (id), Qnil));
1229 else
880820fe
KH
1230 {
1231 Lisp_Object tail;
1232
1233 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1234 {
1235 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1236
1237 if (cs->supplementary_p)
1238 break;
1239 }
1240 if (EQ (tail, Vcharset_ordered_list))
1241 Vcharset_ordered_list = Fcons (make_number (id),
1242 Vcharset_ordered_list);
1243 else if (NILP (tail))
1244 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1245 Fcons (make_number (id), Qnil));
1246 else
1247 {
1248 val = Fcons (XCAR (tail), XCDR (tail));
1249 XSETCDR (tail, val);
1250 XSETCAR (tail, make_number (id));
1251 }
1252 }
dbbb237d 1253 charset_ordered_list_tick++;
3263d5a2 1254 }
4ed46869 1255
3263d5a2 1256 return Qnil;
4ed46869
KH
1257}
1258
2fe1edd1
KH
1259
1260/* Same as Fdefine_charset_internal but arguments are more convenient
1261 to call from C (typically in syms_of_charset). This can define a
1262 charset of `offset' method only. Return the ID of the new
1263 charset. */
1264
1265static int
dd4c5104
DN
1266define_charset_internal (Lisp_Object name,
1267 int dimension,
675e2c69 1268 const unsigned char *code_space,
dd4c5104
DN
1269 unsigned min_code, unsigned max_code,
1270 int iso_final, int iso_revision, int emacs_mule_id,
1271 int ascii_compatible, int supplementary,
1272 int code_offset)
2fe1edd1
KH
1273{
1274 Lisp_Object args[charset_arg_max];
1275 Lisp_Object plist[14];
1276 Lisp_Object val;
1277 int i;
1278
1279 args[charset_arg_name] = name;
1280 args[charset_arg_dimension] = make_number (dimension);
1281 val = Fmake_vector (make_number (8), make_number (0));
1282 for (i = 0; i < 8; i++)
1283 ASET (val, i, make_number (code_space[i]));
1284 args[charset_arg_code_space] = val;
1285 args[charset_arg_min_code] = make_number (min_code);
1286 args[charset_arg_max_code] = make_number (max_code);
1287 args[charset_arg_iso_final]
1288 = (iso_final < 0 ? Qnil : make_number (iso_final));
1289 args[charset_arg_iso_revision] = make_number (iso_revision);
1290 args[charset_arg_emacs_mule_id]
1291 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1292 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1293 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1294 args[charset_arg_invalid_code] = Qnil;
1295 args[charset_arg_code_offset] = make_number (code_offset);
1296 args[charset_arg_map] = Qnil;
1297 args[charset_arg_subset] = Qnil;
1298 args[charset_arg_superset] = Qnil;
1299 args[charset_arg_unify_map] = Qnil;
1300
d67b4f80 1301 plist[0] = intern_c_string (":name");
2fe1edd1 1302 plist[1] = args[charset_arg_name];
d67b4f80 1303 plist[2] = intern_c_string (":dimension");
2fe1edd1 1304 plist[3] = args[charset_arg_dimension];
d67b4f80 1305 plist[4] = intern_c_string (":code-space");
2fe1edd1 1306 plist[5] = args[charset_arg_code_space];
d67b4f80 1307 plist[6] = intern_c_string (":iso-final-char");
2fe1edd1 1308 plist[7] = args[charset_arg_iso_final];
d67b4f80 1309 plist[8] = intern_c_string (":emacs-mule-id");
2fe1edd1 1310 plist[9] = args[charset_arg_emacs_mule_id];
d67b4f80 1311 plist[10] = intern_c_string (":ascii-compatible-p");
2fe1edd1 1312 plist[11] = args[charset_arg_ascii_compatible_p];
d67b4f80 1313 plist[12] = intern_c_string (":code-offset");
2fe1edd1
KH
1314 plist[13] = args[charset_arg_code_offset];
1315
1316 args[charset_arg_plist] = Flist (14, plist);
1317 Fdefine_charset_internal (charset_arg_max, args);
1318
1319 return XINT (CHARSET_SYMBOL_ID (name));
1320}
1321
1322
3263d5a2
KH
1323DEFUN ("define-charset-alias", Fdefine_charset_alias,
1324 Sdefine_charset_alias, 2, 2, 0,
1325 doc: /* Define ALIAS as an alias for charset CHARSET. */)
5842a27b 1326 (Lisp_Object alias, Lisp_Object charset)
4ed46869 1327{
3263d5a2
KH
1328 Lisp_Object attr;
1329
1330 CHECK_CHARSET_GET_ATTR (charset, attr);
1331 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1332 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1333 return Qnil;
1334}
4ed46869 1335
4ed46869 1336
3263d5a2 1337DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1338 doc: /* Return the property list of CHARSET. */)
5842a27b 1339 (Lisp_Object charset)
3263d5a2
KH
1340{
1341 Lisp_Object attrs;
1342
1343 CHECK_CHARSET_GET_ATTR (charset, attrs);
1344 return CHARSET_ATTR_PLIST (attrs);
1345}
1346
1347
1348DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1349 doc: /* Set CHARSET's property list to PLIST. */)
5842a27b 1350 (Lisp_Object charset, Lisp_Object plist)
3263d5a2
KH
1351{
1352 Lisp_Object attrs;
1353
1354 CHECK_CHARSET_GET_ATTR (charset, attrs);
1355 CHARSET_ATTR_PLIST (attrs) = plist;
1356 return plist;
1357}
1358
1359
dbbb237d 1360DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1361 doc: /* Unify characters of CHARSET with Unicode.
1362This means reading the relevant file and installing the table defined
dbbb237d
KH
1363by CHARSET's `:unify-map' property.
1364
64165ae2
DL
1365Optional second arg UNIFY-MAP is a file name string or a vector. It has
1366the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1367`define-charset' (which see).
1368
1369Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
5842a27b 1370 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
8a73a704 1371{
3263d5a2
KH
1372 int id;
1373 struct charset *cs;
8f924df7 1374
3263d5a2
KH
1375 CHECK_CHARSET_GET_ID (charset, id);
1376 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1377 if (NILP (deunify)
1378 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1379 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1380 return Qnil;
dbbb237d 1381
3263d5a2 1382 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1383 if (NILP (deunify))
1384 {
ecca2aad
KH
1385 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1386 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1387 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1388 if (NILP (unify_map))
1389 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1390 else
ecca2aad
KH
1391 {
1392 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1393 signal_error ("Bad unify-map", unify_map);
1394 CHARSET_UNIFY_MAP (cs) = unify_map;
1395 }
1396 if (NILP (Vchar_unify_table))
1397 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1398 char_table_set_range (Vchar_unify_table,
1399 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1400 CHARSET_UNIFIED_P (cs) = 1;
1401 }
1402 else if (CHAR_TABLE_P (Vchar_unify_table))
1403 {
1404 int min_code = CHARSET_MIN_CODE (cs);
1405 int max_code = CHARSET_MAX_CODE (cs);
1406 int min_char = DECODE_CHAR (cs, min_code);
1407 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1408
dbbb237d
KH
1409 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1410 }
8f924df7 1411
3263d5a2 1412 return Qnil;
8a73a704
KH
1413}
1414
3fac5a51
KH
1415DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1416 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1417 doc: /*
d0cf2d48 1418Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1419DIMENSION is the number of bytes to represent a character: 1 or 2.
1420CHARS is the number of characters in a dimension: 94 or 96.
1421
1422This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1423If there's no unused final char for the specified kind of charset,
fdb82f93 1424return nil. */)
5842a27b 1425 (Lisp_Object dimension, Lisp_Object chars)
3fac5a51
KH
1426{
1427 int final_char;
1428
b7826503
PJ
1429 CHECK_NUMBER (dimension);
1430 CHECK_NUMBER (chars);
3263d5a2
KH
1431 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1432 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1433 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1434 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1435 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1436 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1437 break;
3fac5a51
KH
1438 return (final_char <= '?' ? make_number (final_char) : Qnil);
1439}
1440
3263d5a2 1441static void
971de7fb 1442check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
4ed46869 1443{
3263d5a2
KH
1444 CHECK_NATNUM (dimension);
1445 CHECK_NATNUM (chars);
1446 CHECK_NATNUM (final_char);
4ed46869 1447
3263d5a2
KH
1448 if (XINT (dimension) > 3)
1449 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1450 if (XINT (chars) != 94 && XINT (chars) != 96)
1451 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1452 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1453 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1454}
1455
1456
1457DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1458 4, 4, 0,
cefd8c4f
KH
1459 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1460
1461On decoding by an ISO-2022 base coding system, when a charset
1462specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1463if CHARSET is designated instead. */)
5842a27b 1464 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
3263d5a2
KH
1465{
1466 int id;
82215ce9 1467 int chars_flag;
4ed46869 1468
3263d5a2
KH
1469 CHECK_CHARSET_GET_ID (charset, id);
1470 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1471 chars_flag = XINT (chars) == 96;
1472 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1473 return Qnil;
1474}
1475
3263d5a2 1476
2e344af3
KH
1477/* Return information about charsets in the text at PTR of NBYTES
1478 bytes, which are NCHARS characters. The value is:
f6302ac9 1479
cfe34140 1480 0: Each character is represented by one byte. This is always
3263d5a2
KH
1481 true for a unibyte string. For a multibyte string, true if
1482 it contains only ASCII characters.
1483
28c026cd
DL
1484 1: No charsets other than ascii, control-1, and latin-1 are
1485 found.
1d67c29b 1486
3263d5a2
KH
1487 2: Otherwise.
1488*/
4ed46869
KH
1489
1490int
971de7fb 1491string_xstring_p (Lisp_Object string)
4ed46869 1492{
8f924df7
KH
1493 const unsigned char *p = SDATA (string);
1494 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1495
8f924df7 1496 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1497 return 0;
1498
3263d5a2 1499 while (p < endp)
0282eb69 1500 {
3263d5a2 1501 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1502
3cc67a4d 1503 if (c >= 0x100)
3263d5a2 1504 return 2;
0282eb69 1505 }
3263d5a2
KH
1506 return 1;
1507}
05505664 1508
05505664 1509
3263d5a2 1510/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1511
3cc67a4d
KH
1512 CHARSETS is a vector. If Nth element is non-nil, it means the
1513 charset whose id is N is already found.
2e344af3 1514
3263d5a2 1515 It may lookup a translation table TABLE if supplied. */
2e344af3 1516
3263d5a2 1517static void
971de7fb 1518find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
3263d5a2 1519{
dbbb237d 1520 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1521
1522 if (nchars == nbytes)
3263d5a2 1523 {
3cc67a4d
KH
1524 if (multibyte)
1525 ASET (charsets, charset_ascii, Qt);
1526 else
1527 while (ptr < pend)
1528 {
1529 int c = *ptr++;
1530
1531 if (!NILP (table))
1532 c = translate_char (table, c);
1533 if (ASCII_BYTE_P (c))
1534 ASET (charsets, charset_ascii, Qt);
1535 else
1536 ASET (charsets, charset_eight_bit, Qt);
1537 }
1538 }
1539 else
1540 {
1541 while (ptr < pend)
3263d5a2 1542 {
3cc67a4d
KH
1543 int c = STRING_CHAR_ADVANCE (ptr);
1544 struct charset *charset;
3263d5a2 1545
3cc67a4d
KH
1546 if (!NILP (table))
1547 c = translate_char (table, c);
1548 charset = CHAR_CHARSET (c);
1549 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1550 }
4ed46869 1551 }
4ed46869
KH
1552}
1553
1554DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1555 2, 3, 0,
fdb82f93
PJ
1556 doc: /* Return a list of charsets in the region between BEG and END.
1557BEG and END are buffer positions.
1558Optional arg TABLE if non-nil is a translation table to look up.
1559
fdb82f93
PJ
1560If the current buffer is unibyte, the returned list may contain
1561only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1562 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
4ed46869 1563{
3263d5a2 1564 Lisp_Object charsets;
42ca828e
DL
1565 EMACS_INT from, from_byte, to, stop, stop_byte;
1566 int i;
4ed46869 1567 Lisp_Object val;
3cc67a4d 1568 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1569
1570 validate_region (&beg, &end);
1571 from = XFASTINT (beg);
1572 stop = to = XFASTINT (end);
6ae1f27e 1573
4ed46869 1574 if (from < GPT && GPT < to)
6ae1f27e
RS
1575 {
1576 stop = GPT;
1577 stop_byte = GPT_BYTE;
1578 }
1579 else
1580 stop_byte = CHAR_TO_BYTE (stop);
1581
1582 from_byte = CHAR_TO_BYTE (from);
1583
3263d5a2 1584 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1585 while (1)
1586 {
3263d5a2 1587 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1588 stop_byte - from_byte, charsets, table,
1589 multibyte);
4ed46869 1590 if (stop < to)
6ae1f27e
RS
1591 {
1592 from = stop, from_byte = stop_byte;
1593 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1594 }
4ed46869
KH
1595 else
1596 break;
1597 }
6ae1f27e 1598
4ed46869 1599 val = Qnil;
3263d5a2 1600 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1601 if (!NILP (AREF (charsets, i)))
3263d5a2 1602 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1603 return val;
1604}
1605
1606DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1607 1, 2, 0,
fdb82f93
PJ
1608 doc: /* Return a list of charsets in STR.
1609Optional arg TABLE if non-nil is a translation table to look up.
1610
fdb82f93 1611If STR is unibyte, the returned list may contain
3263d5a2 1612only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
5842a27b 1613 (Lisp_Object str, Lisp_Object table)
4ed46869 1614{
3263d5a2 1615 Lisp_Object charsets;
4ed46869
KH
1616 int i;
1617 Lisp_Object val;
1618
b7826503 1619 CHECK_STRING (str);
87b089ad 1620
3263d5a2 1621 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1622 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1623 charsets, table,
1624 STRING_MULTIBYTE (str));
4ed46869 1625 val = Qnil;
3263d5a2 1626 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1627 if (!NILP (AREF (charsets, i)))
3263d5a2 1628 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1629 return val;
1630}
2e344af3 1631
4ed46869 1632\f
3263d5a2 1633
ecca2aad
KH
1634/* Return a unified character code for C (>= 0x110000). VAL is a
1635 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1636 charset symbol. */
1637int
971de7fb 1638maybe_unify_char (int c, Lisp_Object val)
ecca2aad
KH
1639{
1640 struct charset *charset;
1641
1642 if (INTEGERP (val))
1643 return XINT (val);
1644 if (NILP (val))
1645 return c;
1646
1647 CHECK_CHARSET_GET_CHARSET (val, charset);
1648 load_charset (charset, 1);
1649 if (! inhibit_load_charset_map)
1650 {
1651 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1652 if (! NILP (val))
1653 c = XINT (val);
1654 }
1655 else
1656 {
1657 int code_index = c - CHARSET_CODE_OFFSET (charset);
1658 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1659
1660 if (unified > 0)
1661 c = unified;
1662 }
1663 return c;
1664}
1665
1666
3263d5a2
KH
1667/* Return a character correponding to the code-point CODE of
1668 CHARSET. */
1669
1670int
971de7fb 1671decode_char (struct charset *charset, unsigned int code)
4ed46869 1672{
3263d5a2
KH
1673 int c, char_index;
1674 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1675
3263d5a2
KH
1676 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1677 return -1;
4ed46869 1678
374c5cfd 1679 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1680 {
374c5cfd
KH
1681 Lisp_Object subset_info;
1682
1683 subset_info = CHARSET_SUBSET (charset);
1684 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1685 code -= XINT (AREF (subset_info, 3));
1686 if (code < XFASTINT (AREF (subset_info, 1))
1687 || code > XFASTINT (AREF (subset_info, 2)))
1688 c = -1;
1689 else
1690 c = DECODE_CHAR (charset, code);
2e344af3 1691 }
374c5cfd 1692 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1693 {
3263d5a2 1694 Lisp_Object parents;
4ed46869 1695
374c5cfd 1696 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1697 c = -1;
1698 for (; CONSP (parents); parents = XCDR (parents))
1699 {
1700 int id = XINT (XCAR (XCAR (parents)));
1701 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1702 unsigned this_code = code - code_offset;
4ed46869 1703
3263d5a2
KH
1704 charset = CHARSET_FROM_ID (id);
1705 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1706 break;
1707 }
1708 }
1709 else
ac4137cc 1710 {
3263d5a2 1711 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1712 if (char_index < 0)
1713 return -1;
4ed46869 1714
3263d5a2 1715 if (method == CHARSET_METHOD_MAP)
ac4137cc 1716 {
3263d5a2 1717 Lisp_Object decoder;
4ed46869 1718
3263d5a2
KH
1719 decoder = CHARSET_DECODER (charset);
1720 if (! VECTORP (decoder))
ecca2aad
KH
1721 {
1722 load_charset (charset, 1);
1723 decoder = CHARSET_DECODER (charset);
1724 }
1725 if (VECTORP (decoder))
1726 c = XINT (AREF (decoder, char_index));
1727 else
1728 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1729 }
ecca2aad 1730 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1731 {
3263d5a2 1732 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1733 if (CHARSET_UNIFIED_P (charset)
1734 && c > MAX_UNICODE_CHAR)
1735 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1736 }
1737 }
4ed46869 1738
3263d5a2 1739 return c;
90d7b74e
KH
1740}
1741
374c5cfd
KH
1742/* Variable used temporarily by the macro ENCODE_CHAR. */
1743Lisp_Object charset_work;
4ed46869 1744
3263d5a2 1745/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1746 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1747 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1748
3263d5a2 1749unsigned
971de7fb 1750encode_char (struct charset *charset, int c)
9d3d8cba 1751{
3263d5a2
KH
1752 unsigned code;
1753 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1754
3263d5a2 1755 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1756 {
6809ca75 1757 Lisp_Object deunifier;
ecca2aad 1758 int code_index = -1;
4ed46869 1759
3263d5a2
KH
1760 deunifier = CHARSET_DEUNIFIER (charset);
1761 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1762 {
ecca2aad 1763 load_charset (charset, 2);
3263d5a2 1764 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1765 }
ecca2aad
KH
1766 if (CHAR_TABLE_P (deunifier))
1767 {
1768 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1769
1770 if (INTEGERP (deunified))
1771 code_index = XINT (deunified);
1772 }
1773 else
1774 {
1775 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1776 }
1777 if (code_index >= 0)
1778 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1779 }
9d3d8cba 1780
374c5cfd
KH
1781 if (method == CHARSET_METHOD_SUBSET)
1782 {
1783 Lisp_Object subset_info;
1784 struct charset *this_charset;
1785
1786 subset_info = CHARSET_SUBSET (charset);
1787 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1788 code = ENCODE_CHAR (this_charset, c);
1789 if (code == CHARSET_INVALID_CODE (this_charset)
1790 || code < XFASTINT (AREF (subset_info, 1))
1791 || code > XFASTINT (AREF (subset_info, 2)))
1792 return CHARSET_INVALID_CODE (charset);
1793 code += XINT (AREF (subset_info, 3));
1794 return code;
1795 }
9d3d8cba 1796
374c5cfd 1797 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1798 {
3263d5a2 1799 Lisp_Object parents;
d2665018 1800
374c5cfd 1801 parents = CHARSET_SUPERSET (charset);
3263d5a2 1802 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1803 {
3263d5a2
KH
1804 int id = XINT (XCAR (XCAR (parents)));
1805 int code_offset = XINT (XCDR (XCAR (parents)));
1806 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1807
3263d5a2 1808 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1809 if (code != CHARSET_INVALID_CODE (this_charset))
1810 return code + code_offset;
beeedaad 1811 }
3263d5a2
KH
1812 return CHARSET_INVALID_CODE (charset);
1813 }
1bcc1567 1814
15c85a88
KH
1815 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1816 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1817 return CHARSET_INVALID_CODE (charset);
1bcc1567 1818
3263d5a2 1819 if (method == CHARSET_METHOD_MAP)
3f62427c 1820 {
3263d5a2 1821 Lisp_Object encoder;
beeedaad 1822 Lisp_Object val;
9b6a601f 1823
3263d5a2
KH
1824 encoder = CHARSET_ENCODER (charset);
1825 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1826 {
1827 load_charset (charset, 2);
1828 encoder = CHARSET_ENCODER (charset);
1829 }
1830 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1831 {
1832 val = CHAR_TABLE_REF (encoder, c);
1833 if (NILP (val))
1834 return CHARSET_INVALID_CODE (charset);
1835 code = XINT (val);
1836 if (! CHARSET_COMPACT_CODES_P (charset))
1837 code = INDEX_TO_CODE_POINT (charset, code);
1838 }
1839 else
1840 {
1841 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1842 code = INDEX_TO_CODE_POINT (charset, code);
1843 }
3263d5a2 1844 }
820ee249 1845 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1846 {
ecca2aad
KH
1847 int code_index = c - CHARSET_CODE_OFFSET (charset);
1848
1849 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1850 }
8ac5a9cc 1851
3263d5a2 1852 return code;
4ed46869
KH
1853}
1854
4ed46869 1855
3263d5a2
KH
1856DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1857 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1858Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1859
3263d5a2 1860CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1861
3263d5a2 1862Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1863and CODE-POINT to a character. Currently not supported and just ignored. */)
5842a27b 1864 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
4ed46869 1865{
3263d5a2
KH
1866 int c, id;
1867 unsigned code;
1868 struct charset *charsetp;
859f2b3c 1869
3263d5a2
KH
1870 CHECK_CHARSET_GET_ID (charset, id);
1871 if (CONSP (code_point))
1872 {
8f924df7
KH
1873 CHECK_NATNUM_CAR (code_point);
1874 CHECK_NATNUM_CDR (code_point);
69f8de5b 1875 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1876 }
4ed46869
KH
1877 else
1878 {
3263d5a2
KH
1879 CHECK_NATNUM (code_point);
1880 code = XINT (code_point);
4ed46869 1881 }
3263d5a2
KH
1882 charsetp = CHARSET_FROM_ID (id);
1883 c = DECODE_CHAR (charsetp, code);
1884 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1885}
1886
859f2b3c 1887
3263d5a2
KH
1888DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1889 doc: /* Encode the character CH into a code-point of CHARSET.
1890Return nil if CHARSET doesn't include CH.
17e7ef1b 1891
d0cf2d48 1892Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2 1893code-point in CCS. Currently not supported and just ignored. */)
5842a27b 1894 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
4ed46869 1895{
16fed1fc 1896 int id;
3263d5a2
KH
1897 unsigned code;
1898 struct charset *charsetp;
046b1f03 1899
3263d5a2
KH
1900 CHECK_CHARSET_GET_ID (charset, id);
1901 CHECK_NATNUM (ch);
3263d5a2 1902 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1903 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1904 if (code == CHARSET_INVALID_CODE (charsetp))
1905 return Qnil;
1906 if (code > 0x7FFFFFF)
1907 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1908 return make_number (code);
beeedaad
KH
1909}
1910
beeedaad 1911
b121a744
KH
1912DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1913 doc:
1914 /* Return a character of CHARSET whose position codes are CODEn.
1915
1916CODE1 through CODE4 are optional, but if you don't supply sufficient
1917position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1918is specified. */)
5842a27b 1919 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
beeedaad 1920{
3263d5a2
KH
1921 int id, dimension;
1922 struct charset *charsetp;
b121a744
KH
1923 unsigned code;
1924 int c;
87b089ad 1925
3263d5a2
KH
1926 CHECK_CHARSET_GET_ID (charset, id);
1927 charsetp = CHARSET_FROM_ID (id);
4ed46869 1928
b121a744
KH
1929 dimension = CHARSET_DIMENSION (charsetp);
1930 if (NILP (code1))
d47073ca
KH
1931 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1932 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1933 else
859f2b3c 1934 {
b121a744
KH
1935 CHECK_NATNUM (code1);
1936 if (XFASTINT (code1) >= 0x100)
1937 args_out_of_range (make_number (0xFF), code1);
1938 code = XFASTINT (code1);
859f2b3c 1939
b0a1e45e 1940 if (dimension > 1)
beeedaad 1941 {
b121a744 1942 code <<= 8;
b0a1e45e
KH
1943 if (NILP (code2))
1944 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1945 else
b121a744 1946 {
b0a1e45e
KH
1947 CHECK_NATNUM (code2);
1948 if (XFASTINT (code2) >= 0x100)
1949 args_out_of_range (make_number (0xFF), code2);
1950 code |= XFASTINT (code2);
b121a744 1951 }
99529c2c 1952
b0a1e45e 1953 if (dimension > 2)
b121a744
KH
1954 {
1955 code <<= 8;
b0a1e45e
KH
1956 if (NILP (code3))
1957 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1958 else
1959 {
b0a1e45e
KH
1960 CHECK_NATNUM (code3);
1961 if (XFASTINT (code3) >= 0x100)
1962 args_out_of_range (make_number (0xFF), code3);
1963 code |= XFASTINT (code3);
1964 }
1965
1966 if (dimension > 3)
1967 {
1968 code <<= 8;
1969 if (NILP (code4))
1970 code |= charsetp->code_space[0];
1971 else
1972 {
1973 CHECK_NATNUM (code4);
1974 if (XFASTINT (code4) >= 0x100)
1975 args_out_of_range (make_number (0xFF), code4);
1976 code |= XFASTINT (code4);
1977 }
b121a744
KH
1978 }
1979 }
beeedaad 1980 }
859f2b3c 1981 }
beeedaad 1982
b121a744
KH
1983 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1984 code &= 0x7F7F7F7F;
1985 c = DECODE_CHAR (charsetp, code);
1986 if (c < 0)
1987 error ("Invalid code(s)");
3263d5a2 1988 return make_number (c);
4ed46869
KH
1989}
1990
beeedaad 1991
3263d5a2
KH
1992/* Return the first charset in CHARSET_LIST that contains C.
1993 CHARSET_LIST is a list of charset IDs. If it is nil, use
1994 Vcharset_ordered_list. */
beeedaad 1995
3263d5a2 1996struct charset *
971de7fb 1997char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2e344af3 1998{
8a1816bb
KH
1999 int maybe_null = 0;
2000
3263d5a2
KH
2001 if (NILP (charset_list))
2002 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2003 else
2004 maybe_null = 1;
beeedaad 2005
6c652beb 2006 while (CONSP (charset_list))
2e344af3 2007 {
3263d5a2
KH
2008 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2009 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2010
3263d5a2 2011 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2012 {
3263d5a2
KH
2013 if (code_return)
2014 *code_return = code;
2015 return charset;
3f62427c 2016 }
3263d5a2 2017 charset_list = XCDR (charset_list);
c0be27fd
KH
2018 if (! maybe_null
2019 && c <= MAX_UNICODE_CHAR
2020 && EQ (charset_list, Vcharset_non_preferred_head))
6c652beb 2021 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2022 }
8a1816bb
KH
2023 return (maybe_null ? NULL
2024 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2025 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2026}
2027
2e344af3 2028
3263d5a2 2029DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2030 doc:
d0cf2d48 2031 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2032The charset is decided by the current priority order of charsets.
2033A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2034CH in the charset. */)
5842a27b 2035 (Lisp_Object ch)
4ed46869 2036{
3263d5a2
KH
2037 struct charset *charset;
2038 int c, dimension;
2039 unsigned code;
4ed46869
KH
2040 Lisp_Object val;
2041
3263d5a2
KH
2042 CHECK_CHARACTER (ch);
2043 c = XFASTINT (ch);
2044 charset = CHAR_CHARSET (c);
2045 if (! charset)
3cc67a4d 2046 abort ();
3263d5a2
KH
2047 code = ENCODE_CHAR (charset, c);
2048 if (code == CHARSET_INVALID_CODE (charset))
2049 abort ();
2050 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2051 for (val = Qnil; dimension > 0; dimension--)
2052 {
2053 val = Fcons (make_number (code & 0xFF), val);
2054 code >>= 8;
2055 }
3263d5a2 2056 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2057}
2058
740f080d 2059
4cb75c4b
KH
2060DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2061 doc: /* Return the charset of highest priority that contains CH.
2062If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2063from which to find the charset. It may also be a coding system. In
2064that case, find the charset from what supported by that coding system. */)
5842a27b 2065 (Lisp_Object ch, Lisp_Object restriction)
4ed46869 2066{
3263d5a2 2067 struct charset *charset;
4ed46869 2068
3263d5a2 2069 CHECK_CHARACTER (ch);
4cb75c4b
KH
2070 if (NILP (restriction))
2071 charset = CHAR_CHARSET (XINT (ch));
2072 else
2073 {
4cb75c4b
KH
2074 if (CONSP (restriction))
2075 {
7a84eee5
KH
2076 int c = XFASTINT (ch);
2077
2078 for (; CONSP (restriction); restriction = XCDR (restriction))
4cb75c4b 2079 {
7a84eee5 2080 struct charset *charset;
4cb75c4b 2081
7a84eee5
KH
2082 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
2083 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
2084 return XCAR (restriction);
4cb75c4b 2085 }
7a84eee5 2086 return Qnil;
4cb75c4b 2087 }
7a84eee5
KH
2088 restriction = coding_system_charset_list (restriction);
2089 charset = char_charset (XINT (ch), restriction, NULL);
4cb75c4b
KH
2090 if (! charset)
2091 return Qnil;
2092 }
3263d5a2 2093 return (CHARSET_NAME (charset));
4ed46869
KH
2094}
2095
17e7ef1b 2096
3263d5a2
KH
2097DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2098 doc: /*
2099Return charset of a character in the current buffer at position POS.
2100If POS is nil, it defauls to the current point.
2101If POS is out of range, the value is nil. */)
5842a27b 2102 (Lisp_Object pos)
2e344af3 2103{
3263d5a2
KH
2104 Lisp_Object ch;
2105 struct charset *charset;
046b1f03 2106
3263d5a2
KH
2107 ch = Fchar_after (pos);
2108 if (! INTEGERP (ch))
2109 return ch;
2110 charset = CHAR_CHARSET (XINT (ch));
2111 return (CHARSET_NAME (charset));
6ae1f27e 2112}
9036eb45 2113
87b089ad 2114
3263d5a2
KH
2115DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2116 doc: /*
2117Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2118
2119ISO 2022's designation sequence (escape sequence) distinguishes charsets
2120by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2121whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2122See the documentation of the function `charset-info' for the meanings of
2123DIMENSION, CHARS, and FINAL-CHAR. */)
5842a27b 2124 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
6ae1f27e 2125{
3263d5a2 2126 int id;
82215ce9 2127 int chars_flag;
a8a35e61 2128
3263d5a2 2129 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2130 chars_flag = XFASTINT (chars) == 96;
2131 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2132 XFASTINT (final_char));
2133 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2134}
2135
87b089ad 2136
3263d5a2
KH
2137DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2138 0, 0, 0,
2139 doc: /*
ecca2aad
KH
2140Internal use only.
2141Clear temporary charset mapping tables.
2142It should be called only from temacs invoked for dumping. */)
5842a27b 2143 (void)
87b089ad 2144{
ecca2aad 2145 if (temp_charset_work)
87b089ad 2146 {
ecca2aad
KH
2147 free (temp_charset_work);
2148 temp_charset_work = NULL;
2e344af3 2149 }
2e344af3 2150
ecca2aad
KH
2151 if (CHAR_TABLE_P (Vchar_unify_table))
2152 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2153
3263d5a2 2154 return Qnil;
740f080d
KH
2155}
2156
8ddf5e57
DL
2157DEFUN ("charset-priority-list", Fcharset_priority_list,
2158 Scharset_priority_list, 0, 1, 0,
2159 doc: /* Return the list of charsets ordered by priority.
2160HIGHESTP non-nil means just return the highest priority one. */)
5842a27b 2161 (Lisp_Object highestp)
2e344af3 2162{
8ddf5e57 2163 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2164
8ddf5e57 2165 if (!NILP (highestp))
16fed1fc 2166 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2167
8ddf5e57 2168 while (!NILP (list))
2e344af3 2169 {
16fed1fc 2170 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2171 list = XCDR (list);
2e344af3 2172 }
8ddf5e57 2173 return Fnreverse (val);
2e344af3
KH
2174}
2175
8ddf5e57
DL
2176DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2177 1, MANY, 0,
2178 doc: /* Assign higher priority to the charsets given as arguments.
2179usage: (set-charset-priority &rest charsets) */)
5842a27b 2180 (int nargs, Lisp_Object *args)
4ed46869 2181{
af7c60ca 2182 Lisp_Object new_head, old_list, arglist[2];
321c819c 2183 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2184 int i, id;
4ed46869 2185
8ddf5e57 2186 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2187 new_head = Qnil;
8ddf5e57 2188 for (i = 0; i < nargs; i++)
4ed46869 2189 {
8ddf5e57 2190 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2191 if (! NILP (Fmemq (make_number (id), old_list)))
2192 {
2193 old_list = Fdelq (make_number (id), old_list);
2194 new_head = Fcons (make_number (id), new_head);
2195 }
5729c92f 2196 }
8ddf5e57 2197 arglist[0] = Fnreverse (new_head);
6a9c90ec 2198 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2199 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2200 charset_ordered_list_tick++;
5729c92f 2201
6809ca75 2202 charset_unibyte = -1;
321c819c 2203 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2204 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2205 {
e77415b0 2206 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2207 list_2022 = Fcons (XCAR (old_list), list_2022);
2208 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2209 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2210 if (charset_unibyte < 0)
2211 {
2212 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2213
2214 if (CHARSET_DIMENSION (charset) == 1
2215 && CHARSET_ASCII_COMPATIBLE_P (charset)
2216 && CHARSET_MAX_CHAR (charset) >= 0x80)
2217 charset_unibyte = CHARSET_ID (charset);
2218 }
4ed46869 2219 }
321c819c
KH
2220 Viso_2022_charset_list = Fnreverse (list_2022);
2221 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2222 if (charset_unibyte < 0)
2223 charset_unibyte = charset_iso_8859_1;
4ed46869 2224
8ddf5e57 2225 return Qnil;
4ed46869
KH
2226}
2227
d5b33309
KH
2228DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2229 0, 1, 0,
2230 doc: /* Internal use only.
2231Return charset identification number of CHARSET. */)
5842a27b 2232 (Lisp_Object charset)
4ed46869 2233{
d5b33309 2234 int id;
4ed46869 2235
d5b33309
KH
2236 CHECK_CHARSET_GET_ID (charset, id);
2237 return make_number (id);
4ed46869
KH
2238}
2239
7a84eee5
KH
2240struct charset_sort_data
2241{
2242 Lisp_Object charset;
2243 int id;
2244 int priority;
2245};
2246
2247static int
2248charset_compare (const void *d1, const void *d2)
2249{
2250 const struct charset_sort_data *data1 = d1, *data2 = d2;
2251 return (data1->priority - data2->priority);
2252}
2253
2254DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2255 doc: /* Sort charset list CHARSETS by a priority of each charset.
2256Return the sorted list. CHARSETS is modified by side effects.
2257See also `charset-priority-list' and `set-charset-priority'. */)
2258 (Lisp_Object charsets)
2259{
2260 Lisp_Object len = Flength (charsets);
2261 int n = XFASTINT (len), i, j, done;
2262 Lisp_Object tail, elt, attrs;
2263 struct charset_sort_data *sort_data;
2264 int id, min_id, max_id;
2265 USE_SAFE_ALLOCA;
2266
2267 if (n == 0)
2268 return Qnil;
2269 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2270 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2271 {
2272 elt = XCAR (tail);
2273 CHECK_CHARSET_GET_ATTR (elt, attrs);
2274 sort_data[i].charset = elt;
2275 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2276 if (i == 0)
2277 min_id = max_id = id;
2278 else if (id < min_id)
2279 min_id = id;
2280 else if (id > max_id)
2281 max_id = id;
2282 }
2283 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2284 done < n && CONSP (tail); tail = XCDR (tail), i++)
2285 {
2286 elt = XCAR (tail);
2287 id = XFASTINT (elt);
2288 if (id >= min_id && id <= max_id)
2289 for (j = 0; j < n; j++)
2290 if (sort_data[j].id == id)
2291 {
2292 sort_data[j].priority = i;
2293 done++;
2294 }
2295 }
2296 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2297 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2298 XSETCAR (tail, sort_data[i].charset);
2299 SAFE_FREE ();
2300 return charsets;
2301}
2302
4ed46869 2303\f
3263d5a2 2304void
971de7fb 2305init_charset (void)
4ed46869 2306{
c8f94403
GM
2307 Lisp_Object tempdir;
2308 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
6809ca75 2309 if (access ((char *) SDATA (tempdir), 0) < 0)
c8f94403
GM
2310 {
2311 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2312Emacs will not function correctly without the character map files.\n\
2313Please check your installation!\n",
2314 tempdir);
2315 /* TODO should this be a fatal error? (Bug#909) */
2316 }
2317
2318 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2319}
2320
4ed46869 2321
dfcf069d 2322void
971de7fb 2323init_charset_once (void)
4ed46869
KH
2324{
2325 int i, j, k;
2326
3263d5a2
KH
2327 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2328 for (j = 0; j < ISO_MAX_CHARS; j++)
2329 for (k = 0; k < ISO_MAX_FINAL; k++)
2330 iso_charset_table[i][j][k] = -1;
4ed46869 2331
60383934 2332 for (i = 0; i < 256; i++)
3263d5a2 2333 emacs_mule_charset[i] = NULL;
4ed46869 2334
7c7dceee
KH
2335 charset_jisx0201_roman = -1;
2336 charset_jisx0208_1978 = -1;
2337 charset_jisx0208 = -1;
d32320c4 2338 charset_ksc5601 = -1;
4ed46869
KH
2339}
2340
2341#ifdef emacs
2342
dfcf069d 2343void
971de7fb 2344syms_of_charset (void)
4ed46869 2345{
3263d5a2
KH
2346 DEFSYM (Qcharsetp, "charsetp");
2347
2348 DEFSYM (Qascii, "ascii");
2349 DEFSYM (Qunicode, "unicode");
6c652beb 2350 DEFSYM (Qemacs, "emacs");
2fe1edd1 2351 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2352 DEFSYM (Qiso_8859_1, "iso-8859-1");
2353
2354 DEFSYM (Qgl, "gl");
2355 DEFSYM (Qgr, "gr");
2356
3263d5a2
KH
2357 staticpro (&Vcharset_ordered_list);
2358 Vcharset_ordered_list = Qnil;
2359
2360 staticpro (&Viso_2022_charset_list);
2361 Viso_2022_charset_list = Qnil;
2362
2363 staticpro (&Vemacs_mule_charset_list);
2364 Vemacs_mule_charset_list = Qnil;
2365
3943ed76 2366 /* Don't staticpro them here. It's done in syms_of_fns. */
088dcc3e
DN
2367 QCtest = intern_c_string (":test");
2368 Qeq = intern_c_string ("eq");
3943ed76 2369
3263d5a2 2370 staticpro (&Vcharset_hash_table);
8f924df7
KH
2371 {
2372 Lisp_Object args[2];
2373 args[0] = QCtest;
2374 args[1] = Qeq;
2375 Vcharset_hash_table = Fmake_hash_table (2, args);
2376 }
3263d5a2
KH
2377
2378 charset_table_size = 128;
2379 charset_table = ((struct charset *)
2380 xmalloc (sizeof (struct charset) * charset_table_size));
2381 charset_table_used = 0;
2382
3263d5a2
KH
2383 defsubr (&Scharsetp);
2384 defsubr (&Smap_charset_chars);
2385 defsubr (&Sdefine_charset_internal);
2386 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2387 defsubr (&Scharset_plist);
2388 defsubr (&Sset_charset_plist);
2389 defsubr (&Sunify_charset);
3fac5a51 2390 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2391 defsubr (&Sdeclare_equiv_charset);
2392 defsubr (&Sfind_charset_region);
2393 defsubr (&Sfind_charset_string);
3263d5a2
KH
2394 defsubr (&Sdecode_char);
2395 defsubr (&Sencode_char);
4ed46869 2396 defsubr (&Ssplit_char);
3263d5a2 2397 defsubr (&Smake_char);
4ed46869 2398 defsubr (&Schar_charset);
90d7b74e 2399 defsubr (&Scharset_after);
4ed46869 2400 defsubr (&Siso_charset);
3263d5a2 2401 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2402 defsubr (&Scharset_priority_list);
2403 defsubr (&Sset_charset_priority);
d5b33309 2404 defsubr (&Scharset_id_internal);
7a84eee5 2405 defsubr (&Ssort_charsets);
3263d5a2 2406
4beef065 2407 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2408 doc: /* *List of directories to search for charset map files. */);
4beef065 2409 Vcharset_map_path = Qnil;
4ed46869 2410
ecca2aad
KH
2411 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2412 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2413 inhibit_load_charset_map = 0;
2414
4ed46869 2415 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2416 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2417 Vcharset_list = Qnil;
2418
6a9c90ec
KH
2419 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2420 doc: /* ISO639 language mnemonic symbol for the current language environment.
2421If the current language environment is for multiple languages (e.g. "Latin-1"),
2422the value may be a list of mnemonics. */);
2423 Vcurrent_iso639_language = Qnil;
2424
2fe1edd1
KH
2425 charset_ascii
2426 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2427 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2428 charset_iso_8859_1
2429 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2430 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2431 charset_unicode
73fbf2d9 2432 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2433 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2434 charset_emacs
2435 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2436 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2437 charset_eight_bit
2438 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2439 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2440 MAX_5_BYTE_CHAR + 1);
6809ca75 2441 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2442}
2443
2444#endif /* emacs */
cefd8c4f
KH
2445
2446/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2447 (do not change this comment) */