Fix decoding of emacs-mule coding system.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
114f9c96 3 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 5 2005, 2006, 2007, 2008, 2009, 2010
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
7a84eee5 31#include <stdlib.h>
3263d5a2
KH
32#include <unistd.h>
33#include <ctype.h>
4ed46869 34#include <sys/types.h>
d7306fe6 35#include <setjmp.h>
4ed46869 36#include "lisp.h"
3263d5a2 37#include "character.h"
4ed46869
KH
38#include "charset.h"
39#include "coding.h"
fc6b09bf 40#include "disptab.h"
3263d5a2 41#include "buffer.h"
4ed46869 42
04c2f2c5 43/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 44
3263d5a2 45 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 46 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
4ed46869 50
3263d5a2
KH
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
04c2f2c5 54 charset_table as a struct charset.
4ed46869 55
3263d5a2 56*/
4ed46869 57
3263d5a2
KH
58/* List of all charsets. This variable is used only from Emacs
59 Lisp. */
4ed46869 60Lisp_Object Vcharset_list;
4ed46869 61
3263d5a2
KH
62/* Hash table that contains attributes of each charset. Keys are
63 charset symbols, and values are vectors of charset attributes. */
64Lisp_Object Vcharset_hash_table;
4ed46869 65
3263d5a2
KH
66/* Table of struct charset. */
67struct charset *charset_table;
4ed46869 68
3263d5a2 69static int charset_table_size;
5af5dd92 70static int charset_table_used;
4ed46869 71
3263d5a2 72Lisp_Object Qcharsetp;
4ed46869 73
3263d5a2
KH
74/* Special charset symbols. */
75Lisp_Object Qascii;
2fe1edd1 76Lisp_Object Qeight_bit;
3263d5a2
KH
77Lisp_Object Qiso_8859_1;
78Lisp_Object Qunicode;
6c652beb 79Lisp_Object Qemacs;
4ed46869 80
3263d5a2
KH
81/* The corresponding charsets. */
82int charset_ascii;
2fe1edd1 83int charset_eight_bit;
3263d5a2
KH
84int charset_iso_8859_1;
85int charset_unicode;
6c652beb 86int charset_emacs;
b0e3cf2b 87
7c7dceee
KH
88/* The other special charsets. */
89int charset_jisx0201_roman;
90int charset_jisx0208_1978;
91int charset_jisx0208;
d32320c4 92int charset_ksc5601;
c1a08b4c 93
3263d5a2
KH
94/* Value of charset attribute `charset-iso-plane'. */
95Lisp_Object Qgl, Qgr;
c1a08b4c 96
d1a04588
KH
97/* Charset of unibyte characters. */
98int charset_unibyte;
4ed46869 99
3263d5a2
KH
100/* List of charsets ordered by the priority. */
101Lisp_Object Vcharset_ordered_list;
4ed46869 102
6a9c90ec
KH
103/* Sub-list of Vcharset_ordered_list that contains all non-preferred
104 charsets. */
105Lisp_Object Vcharset_non_preferred_head;
106
dbbb237d 107/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 108 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
109 -1. */
110unsigned short charset_ordered_list_tick;
4ed46869 111
3263d5a2
KH
112/* List of iso-2022 charsets. */
113Lisp_Object Viso_2022_charset_list;
35e623fb 114
3263d5a2
KH
115/* List of emacs-mule charsets. */
116Lisp_Object Vemacs_mule_charset_list;
117
b84ae584 118int emacs_mule_charset[256];
4ed46869
KH
119
120/* Mapping table from ISO2022's charset (specified by DIMENSION,
121 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
122int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
123
4beef065 124Lisp_Object Vcharset_map_path;
3263d5a2 125
ecca2aad
KH
126/* If nonzero, don't load charset maps. */
127int inhibit_load_charset_map;
3263d5a2 128
6a9c90ec
KH
129Lisp_Object Vcurrent_iso639_language;
130
64165ae2
DL
131/* Defined in chartab.c */
132extern void
133map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
134 Lisp_Object function, Lisp_Object table,
135 Lisp_Object arg, struct charset *charset,
136 unsigned from, unsigned to));
137
69f8de5b
KH
138#define CODE_POINT_TO_INDEX(charset, code) \
139 ((charset)->code_linear_p \
140 ? (code) - (charset)->min_code \
141 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
142 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
143 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
144 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
145 ? (((((code) >> 24) - (charset)->code_space[12]) \
146 * (charset)->code_space[11]) \
147 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
148 * (charset)->code_space[7]) \
149 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
150 * (charset)->code_space[3]) \
820ee249
KH
151 + (((code) & 0xFF) - (charset)->code_space[0]) \
152 - ((charset)->char_index_offset)) \
3263d5a2
KH
153 : -1)
154
155
156/* Convert the character index IDX to code-point CODE for CHARSET.
157 It is assumed that IDX is in a valid range. */
158
820ee249
KH
159#define INDEX_TO_CODE_POINT(charset, idx) \
160 ((charset)->code_linear_p \
161 ? (idx) + (charset)->min_code \
162 : (idx += (charset)->char_index_offset, \
163 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
164 | (((charset)->code_space[4] \
165 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
166 << 8) \
167 | (((charset)->code_space[8] \
168 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
169 << 16) \
170 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
171 << 24))))
4cf9710d 172
ecca2aad
KH
173/* Structure to hold mapping tables for a charset. Used by temacs
174 invoked for dumping. */
8a73a704 175
ecca2aad
KH
176static struct
177{
178 /* The current charset for which the following tables are setup. */
179 struct charset *current;
180
181 /* 1 iff the following table is used for encoder. */
182 short for_encoder;
183
184 /* When the following table is used for encoding, mininum and
185 maxinum character of the current charset. */
186 int min_char, max_char;
187
188 /* A Unicode character correspoinding to the code indice 0 (i.e. the
189 minimum code-point) of the current charset, or -1 if the code
190 indice 0 is not a Unicode character. This is checked when
191 table.encoder[CHAR] is zero. */
192 int zero_index_char;
193
194 union {
195 /* Table mapping code-indices (not code-points) of the current
196 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
197 doesn't belong to the current charset. */
198 int decoder[0x10000];
199 /* Table mapping Unicode characters to code-indices of the current
200 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
201 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
202 (0x20000..0x2FFFF). Note that there is no charset map that
203 uses both SMP and SIP. */
204 unsigned short encoder[0x20000];
205 } table;
206} *temp_charset_work;
207
208#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
209 do { \
210 if ((CODE) == 0) \
211 temp_charset_work->zero_index_char = (C); \
212 else if ((C) < 0x20000) \
213 temp_charset_work->table.encoder[(C)] = (CODE); \
214 else \
215 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
216 } while (0)
217
218#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
219 ((C) == temp_charset_work->zero_index_char ? 0 \
220 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
221 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
222 : temp_charset_work->table.encoder[(C) - 0x10000] \
223 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
224
225#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
226 (temp_charset_work->table.decoder[(CODE)] = (C))
227
228#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
229 (temp_charset_work->table.decoder[(CODE)])
046b1f03 230\f
93bcb785 231
e9ce014c
KH
232/* Set to 1 to warn that a charset map is loaded and thus a buffer
233 text and a string data may be relocated. */
3263d5a2 234int charset_map_loaded;
4ed46869 235
e9ce014c 236struct charset_map_entries
4ed46869 237{
e9ce014c
KH
238 struct {
239 unsigned from, to;
240 int c;
241 } entry[0x10000];
242 struct charset_map_entries *next;
243};
244
ecca2aad
KH
245/* Load the mapping information of CHARSET from ENTRIES for
246 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
247 encoding (CONTROL_FLAG == 2).
248
249 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
250 and CHARSET->fast_map.
251
252 If CONTROL_FLAG is 1, setup the following tables according to
253 CHARSET->method and inhibit_load_charset_map.
4cf9710d 254
ecca2aad
KH
255 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
258 ----------------------+--------------------+---------------------------
259 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 260
ecca2aad 261 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 262
ecca2aad
KH
263 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
264 ----------------------+--------------------+---------------------------
265 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
266 ----------------------+--------------------+--------------------------
267 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
268*/
4ed46869 269
3263d5a2 270static void
e9ce014c 271load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 272 struct charset *charset;
e9ce014c
KH
273 struct charset_map_entries *entries;
274 int n_entries;
3263d5a2 275 int control_flag;
4ed46869 276{
3263d5a2 277 Lisp_Object vec, table;
3263d5a2
KH
278 unsigned max_code = CHARSET_MAX_CODE (charset);
279 int ascii_compatible_p = charset->ascii_compatible_p;
280 int min_char, max_char, nonascii_min_char;
3263d5a2 281 int i;
3263d5a2 282 unsigned char *fast_map = charset->fast_map;
99529c2c 283
e9ce014c
KH
284 if (n_entries <= 0)
285 return;
286
ecca2aad 287 if (control_flag)
8ac5a9cc 288 {
ecca2aad
KH
289 if (! inhibit_load_charset_map)
290 {
291 if (control_flag == 1)
292 {
293 if (charset->method == CHARSET_METHOD_MAP)
294 {
295 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 296
ecca2aad
KH
297 vec = CHARSET_DECODER (charset)
298 = Fmake_vector (make_number (n), make_number (-1));
299 }
300 else
301 {
302 char_table_set_range (Vchar_unify_table,
303 charset->min_char, charset->max_char,
304 Qnil);
305 }
306 }
307 else
308 {
309 table = Fmake_char_table (Qnil, Qnil);
310 if (charset->method == CHARSET_METHOD_MAP)
311 CHARSET_ENCODER (charset) = table;
312 else
313 CHARSET_DEUNIFIER (charset) = table;
314 }
315 }
316 else
317 {
318 if (! temp_charset_work)
319 temp_charset_work = malloc (sizeof (*temp_charset_work));
320 if (control_flag == 1)
321 {
322 memset (temp_charset_work->table.decoder, -1,
323 sizeof (int) * 0x10000);
ecca2aad
KH
324 }
325 else
326 {
327 memset (temp_charset_work->table.encoder, 0,
328 sizeof (unsigned short) * 0x20000);
329 temp_charset_work->zero_index_char = -1;
330 }
331 temp_charset_work->current = charset;
332 temp_charset_work->for_encoder = (control_flag == 2);
333 control_flag += 2;
334 }
3263d5a2 335 charset_map_loaded = 1;
2e344af3 336 }
6662e69b 337
e9ce014c 338 min_char = max_char = entries->entry[0].c;
3263d5a2 339 nonascii_min_char = MAX_CHAR;
e9ce014c 340 for (i = 0; i < n_entries; i++)
2e344af3 341 {
e9ce014c 342 unsigned from, to;
3b4f4446
KH
343 int from_index, to_index;
344 int from_c, to_c;
e9ce014c 345 int idx = i % 0x10000;
3263d5a2 346
e9ce014c
KH
347 if (i > 0 && idx == 0)
348 entries = entries->next;
349 from = entries->entry[idx].from;
350 to = entries->entry[idx].to;
3b4f4446
KH
351 from_c = entries->entry[idx].c;
352 from_index = CODE_POINT_TO_INDEX (charset, from);
353 if (from == to)
6662e69b 354 {
3b4f4446
KH
355 to_index = from_index;
356 to_c = from_c;
6662e69b 357 }
3b4f4446 358 else
6662e69b 359 {
3b4f4446
KH
360 to_index = CODE_POINT_TO_INDEX (charset, to);
361 to_c = from_c + (to_index - from_index);
6662e69b 362 }
3b4f4446
KH
363 if (from_index < 0 || to_index < 0)
364 continue;
3263d5a2 365
ecca2aad
KH
366 if (to_c > max_char)
367 max_char = to_c;
368 else if (from_c < min_char)
369 min_char = from_c;
6662e69b 370
ecca2aad
KH
371 if (control_flag == 1)
372 {
373 if (charset->method == CHARSET_METHOD_MAP)
374 for (; from_index <= to_index; from_index++, from_c++)
375 ASET (vec, from_index, make_number (from_c));
376 else
377 for (; from_index <= to_index; from_index++, from_c++)
378 CHAR_TABLE_SET (Vchar_unify_table,
379 CHARSET_CODE_OFFSET (charset) + from_index,
380 make_number (from_c));
381 }
382 else if (control_flag == 2)
383 {
384 if (charset->method == CHARSET_METHOD_MAP
385 && CHARSET_COMPACT_CODES_P (charset))
386 for (; from_index <= to_index; from_index++, from_c++)
387 {
388 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
389
390 if (NILP (CHAR_TABLE_REF (table, from_c)))
391 CHAR_TABLE_SET (table, from_c, make_number (code));
392 }
393 else
394 for (; from_index <= to_index; from_index++, from_c++)
395 {
396 if (NILP (CHAR_TABLE_REF (table, from_c)))
397 CHAR_TABLE_SET (table, from_c, make_number (from_index));
398 }
399 }
400 else if (control_flag == 3)
401 for (; from_index <= to_index; from_index++, from_c++)
402 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
403 else if (control_flag == 4)
404 for (; from_index <= to_index; from_index++, from_c++)
405 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
406 else /* control_flag == 0 */
407 {
3b4f4446
KH
408 if (ascii_compatible_p)
409 {
410 if (! ASCII_BYTE_P (from_c))
411 {
412 if (from_c < nonascii_min_char)
413 nonascii_min_char = from_c;
414 }
415 else if (! ASCII_BYTE_P (to_c))
416 {
417 nonascii_min_char = 0x80;
418 }
419 }
177c0ea7 420
ecca2aad
KH
421 for (; from_c <= to_c; from_c++)
422 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 423 }
8ac5a9cc 424 }
3263d5a2 425
ecca2aad 426 if (control_flag == 0)
4ed46869 427 {
3263d5a2
KH
428 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
429 ? nonascii_min_char : min_char);
430 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 431 }
ecca2aad
KH
432 else if (control_flag == 4)
433 {
434 temp_charset_work->min_char = min_char;
435 temp_charset_work->max_char = max_char;
436 }
4ed46869
KH
437}
438
12bcae05 439
3263d5a2
KH
440/* Read a hexadecimal number (preceded by "0x") from the file FP while
441 paying attention to comment charcter '#'. */
12bcae05 442
3263d5a2
KH
443static INLINE unsigned
444read_hex (fp, eof)
445 FILE *fp;
446 int *eof;
12bcae05 447{
3263d5a2
KH
448 int c;
449 unsigned n;
12bcae05 450
3263d5a2
KH
451 while ((c = getc (fp)) != EOF)
452 {
69f8de5b 453 if (c == '#')
3263d5a2
KH
454 {
455 while ((c = getc (fp)) != EOF && c != '\n');
456 }
457 else if (c == '0')
458 {
459 if ((c = getc (fp)) == EOF || c == 'x')
460 break;
461 }
8f924df7 462 }
3263d5a2
KH
463 if (c == EOF)
464 {
465 *eof = 1;
466 return 0;
467 }
468 *eof = 0;
469 n = 0;
470 if (c == 'x')
471 while ((c = getc (fp)) != EOF && isxdigit (c))
472 n = ((n << 4)
473 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
474 else
475 while ((c = getc (fp)) != EOF && isdigit (c))
476 n = (n * 10) + c - '0';
e9ce014c
KH
477 if (c != EOF)
478 ungetc (c, fp);
3263d5a2
KH
479 return n;
480}
12bcae05 481
dde2559c 482extern Lisp_Object Qfile_name_handler_alist;
537efd8d 483
3263d5a2 484/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
485 Each line of MAPFILE has this form
486 0xAAAA 0xCCCC
487 where 0xAAAA is a code-point and 0xCCCC is the corresponding
488 character code, or this form
489 0xAAAA-0xBBBB 0xCCCC
490 where 0xAAAA and 0xBBBB are code-points specifying a range, and
491 0xCCCC is the first character code of the range.
4ed46869 492
3263d5a2
KH
493 The returned vector has this form:
494 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 495 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
496 range.
497
18a10a21
JB
498 Note that this function uses `openp' to open MAPFILE but ignores
499 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 500
c449997d 501extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
4ed46869 502
e9ce014c
KH
503static void
504load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
505 struct charset *charset;
506 Lisp_Object mapfile;
e9ce014c 507 int control_flag;
4ed46869 508{
e9ce014c
KH
509 unsigned min_code = CHARSET_MIN_CODE (charset);
510 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
511 int fd;
512 FILE *fp;
3263d5a2
KH
513 int eof;
514 Lisp_Object suffixes;
e9ce014c 515 struct charset_map_entries *head, *entries;
a2f3eb19
CY
516 int n_entries, count;
517 USE_SAFE_ALLOCA;
4ed46869 518
3263d5a2
KH
519 suffixes = Fcons (build_string (".map"),
520 Fcons (build_string (".TXT"), Qnil));
4ed46869 521
a2f3eb19 522 count = SPECPDL_INDEX ();
dde2559c 523 specbind (Qfile_name_handler_alist, Qnil);
4beef065 524 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
dde2559c 525 unbind_to (count, Qnil);
3263d5a2
KH
526 if (fd < 0
527 || ! (fp = fdopen (fd, "r")))
8458d4c1 528 error ("Failure in loading charset map: %S", SDATA (mapfile));
4ed46869 529
a2f3eb19
CY
530 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
531 large (larger than MAX_ALLOCA). */
532 SAFE_ALLOCA (head, struct charset_map_entries *,
533 sizeof (struct charset_map_entries));
534 entries = head;
412c01b6 535 bzero (entries, sizeof (struct charset_map_entries));
a2f3eb19 536
e9ce014c 537 n_entries = 0;
3263d5a2
KH
538 eof = 0;
539 while (1)
540 {
e9ce014c
KH
541 unsigned from, to;
542 int c;
543 int idx;
4ed46869 544
e9ce014c 545 from = read_hex (fp, &eof);
3263d5a2
KH
546 if (eof)
547 break;
e9ce014c
KH
548 if (getc (fp) == '-')
549 to = read_hex (fp, &eof);
550 else
551 to = from;
552 c = (int) read_hex (fp, &eof);
ac4137cc 553
e9ce014c
KH
554 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
555 continue;
ac4137cc 556
e9ce014c 557 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 558 {
a2f3eb19
CY
559 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
560 sizeof (struct charset_map_entries));
e9ce014c 561 entries = entries->next;
412c01b6 562 bzero (entries, sizeof (struct charset_map_entries));
3263d5a2 563 }
e9ce014c
KH
564 idx = n_entries % 0x10000;
565 entries->entry[idx].from = from;
566 entries->entry[idx].to = to;
567 entries->entry[idx].c = c;
568 n_entries++;
3263d5a2
KH
569 }
570 fclose (fp);
571 close (fd);
177c0ea7 572
e9ce014c 573 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 574 SAFE_FREE ();
4ed46869
KH
575}
576
e9ce014c
KH
577static void
578load_charset_map_from_vector (charset, vec, control_flag)
579 struct charset *charset;
580 Lisp_Object vec;
581 int control_flag;
23d2a7f1 582{
e9ce014c
KH
583 unsigned min_code = CHARSET_MIN_CODE (charset);
584 unsigned max_code = CHARSET_MAX_CODE (charset);
585 struct charset_map_entries *head, *entries;
586 int n_entries;
587 int len = ASIZE (vec);
588 int i;
a2f3eb19 589 USE_SAFE_ALLOCA;
23d2a7f1 590
e9ce014c 591 if (len % 2 == 1)
3263d5a2 592 {
e9ce014c
KH
593 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
594 return;
3263d5a2 595 }
35e623fb 596
a2f3eb19
CY
597 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
598 large (larger than MAX_ALLOCA). */
599 SAFE_ALLOCA (head, struct charset_map_entries *,
600 sizeof (struct charset_map_entries));
601 entries = head;
412c01b6 602 bzero (entries, sizeof (struct charset_map_entries));
a2f3eb19 603
e9ce014c
KH
604 n_entries = 0;
605 for (i = 0; i < len; i += 2)
35e623fb 606 {
e9ce014c
KH
607 Lisp_Object val, val2;
608 unsigned from, to;
609 int c;
610 int idx;
d2665018 611
e9ce014c
KH
612 val = AREF (vec, i);
613 if (CONSP (val))
bbf12bb3 614 {
e9ce014c
KH
615 val2 = XCDR (val);
616 val = XCAR (val);
617 CHECK_NATNUM (val);
618 CHECK_NATNUM (val2);
619 from = XFASTINT (val);
620 to = XFASTINT (val2);
bbf12bb3 621 }
e9ce014c 622 else
bbf12bb3 623 {
e9ce014c
KH
624 CHECK_NATNUM (val);
625 from = to = XFASTINT (val);
bbf12bb3 626 }
e9ce014c
KH
627 val = AREF (vec, i + 1);
628 CHECK_NATNUM (val);
629 c = XFASTINT (val);
76d7b829 630
e9ce014c
KH
631 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
632 continue;
76d7b829 633
dcc694d7 634 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 635 {
a2f3eb19
CY
636 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
637 sizeof (struct charset_map_entries));
e9ce014c 638 entries = entries->next;
412c01b6 639 bzero (entries, sizeof (struct charset_map_entries));
e9ce014c
KH
640 }
641 idx = n_entries % 0x10000;
642 entries->entry[idx].from = from;
643 entries->entry[idx].to = to;
644 entries->entry[idx].c = c;
645 n_entries++;
646 }
76d7b829 647
e9ce014c 648 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 649 SAFE_FREE ();
ac4137cc
KH
650}
651
ecca2aad
KH
652
653/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
654 map it is (see the comment of load_charset_map for the detail). */
655
3263d5a2 656static void
ecca2aad 657load_charset (charset, control_flag)
3263d5a2 658 struct charset *charset;
ecca2aad 659 int control_flag;
76d7b829 660{
ecca2aad 661 Lisp_Object map;
76d7b829 662
ecca2aad
KH
663 if (inhibit_load_charset_map
664 && temp_charset_work
665 && charset == temp_charset_work->current
56f00ed2 666 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
667 return;
668
669 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
670 map = CHARSET_MAP (charset);
671 else if (CHARSET_UNIFIED_P (charset))
672 map = CHARSET_UNIFY_MAP (charset);
673 if (STRINGP (map))
674 load_charset_map_from_file (charset, map, control_flag);
675 else
676 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 677}
76d7b829 678
3263d5a2
KH
679
680DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
681 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
682 (object)
683 Lisp_Object object;
23d2a7f1 684{
3263d5a2 685 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
686}
687
4ed46869 688
ecca2aad
KH
689void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
690 Lisp_Object function, Lisp_Object arg,
691 unsigned from, unsigned to));
692
693void
694map_charset_for_dump (c_function, function, arg, from, to)
695 void (*c_function) (Lisp_Object, Lisp_Object);
696 Lisp_Object function, arg;
697 unsigned from, to;
698{
699 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
700 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
701 Lisp_Object range;
702 int c, stop;
703 struct gcpro gcpro1;
704
705 range = Fcons (Qnil, Qnil);
706 GCPRO1 (range);
707
708 c = temp_charset_work->min_char;
709 stop = (temp_charset_work->max_char < 0x20000
710 ? temp_charset_work->max_char : 0xFFFF);
711
712 while (1)
713 {
714 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
715
716 if (index >= from_idx && index <= to_idx)
717 {
718 if (NILP (XCAR (range)))
719 XSETCAR (range, make_number (c));
720 }
721 else if (! NILP (XCAR (range)))
722 {
723 XSETCDR (range, make_number (c - 1));
724 if (c_function)
725 (*c_function) (arg, range);
726 else
727 call2 (function, range, arg);
728 XSETCAR (range, Qnil);
729 }
730 if (c == stop)
731 {
732 if (c == temp_charset_work->max_char)
733 {
734 if (! NILP (XCAR (range)))
735 {
736 XSETCDR (range, make_number (c));
737 if (c_function)
738 (*c_function) (arg, range);
739 else
740 call2 (function, range, arg);
741 }
742 break;
743 }
744 c = 0x1FFFF;
745 stop = temp_charset_work->max_char;
746 }
747 c++;
748 }
c542407d 749 UNGCPRO;
ecca2aad
KH
750}
751
4ed46869 752void
374c5cfd
KH
753map_charset_chars (c_function, function, arg,
754 charset, from, to)
755 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
756 Lisp_Object function, arg;
757 struct charset *charset;
758 unsigned from, to;
4ed46869 759{
3263d5a2 760 Lisp_Object range;
374c5cfd 761 int partial;
3263d5a2 762
374c5cfd
KH
763 partial = (from > CHARSET_MIN_CODE (charset)
764 || to < CHARSET_MAX_CODE (charset));
765
3263d5a2 766 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 767 {
374c5cfd
KH
768 int from_idx = CODE_POINT_TO_INDEX (charset, from);
769 int to_idx = CODE_POINT_TO_INDEX (charset, to);
770 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
771 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
772
ecca2aad
KH
773 if (CHARSET_UNIFIED_P (charset))
774 {
775 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
776 load_charset (charset, 2);
777 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
778 map_char_table_for_charset (c_function, function,
779 CHARSET_DEUNIFIER (charset), arg,
780 partial ? charset : NULL, from, to);
781 else
782 map_charset_for_dump (c_function, function, arg, from, to);
783 }
784
374c5cfd 785 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 786 if (NILP (function))
5af5dd92 787 (*c_function) (arg, range);
3263d5a2
KH
788 else
789 call2 (function, range, arg);
c83ef371 790 }
3263d5a2
KH
791 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
792 {
793 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
794 load_charset (charset, 2);
795 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
796 map_char_table_for_charset (c_function, function,
797 CHARSET_ENCODER (charset), arg,
798 partial ? charset : NULL, from, to);
799 else
800 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 801 }
374c5cfd 802 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 803 {
374c5cfd
KH
804 Lisp_Object subset_info;
805 int offset;
806
807 subset_info = CHARSET_SUBSET (charset);
808 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
809 offset = XINT (AREF (subset_info, 3));
810 from -= offset;
811 if (from < XFASTINT (AREF (subset_info, 1)))
812 from = XFASTINT (AREF (subset_info, 1));
813 to -= offset;
814 if (to > XFASTINT (AREF (subset_info, 2)))
815 to = XFASTINT (AREF (subset_info, 2));
816 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 817 }
374c5cfd
KH
818 else /* i.e. CHARSET_METHOD_SUPERSET */
819 {
820 Lisp_Object parents;
4ed46869 821
374c5cfd
KH
822 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
823 parents = XCDR (parents))
bbf12bb3 824 {
374c5cfd
KH
825 int offset;
826 unsigned this_from, this_to;
827
828 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
829 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
830 this_from = from > offset ? from - offset : 0;
831 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
832 if (this_from < CHARSET_MIN_CODE (charset))
833 this_from = CHARSET_MIN_CODE (charset);
834 if (this_to > CHARSET_MAX_CODE (charset))
835 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
836 map_charset_chars (c_function, function, arg, charset,
837 this_from, this_to);
bbf12bb3 838 }
35e623fb 839 }
4ed46869
KH
840}
841
374c5cfd 842DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 843 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 844FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 845argument ARG.
4ed46869 846
374c5cfd
KH
847RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
848characters contained in CHARSET.
4ed46869 849
374c5cfd 850The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 851range of code points (in CHARSET) of target characters. */)
374c5cfd
KH
852 (function, charset, arg, from_code, to_code)
853 Lisp_Object function, charset, arg, from_code, to_code;
4ed46869 854{
374c5cfd 855 struct charset *cs;
16fed1fc 856 unsigned from, to;
4ed46869 857
374c5cfd
KH
858 CHECK_CHARSET_GET_CHARSET (charset, cs);
859 if (NILP (from_code))
16fed1fc 860 from = CHARSET_MIN_CODE (cs);
970b7474 861 else
4ed46869 862 {
970b7474
KH
863 CHECK_NATNUM (from_code);
864 from = XINT (from_code);
865 if (from < CHARSET_MIN_CODE (cs))
866 from = CHARSET_MIN_CODE (cs);
4ed46869 867 }
374c5cfd 868 if (NILP (to_code))
970b7474 869 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
870 else
871 {
970b7474
KH
872 CHECK_NATNUM (to_code);
873 to = XINT (to_code);
874 if (to > CHARSET_MAX_CODE (cs))
875 to = CHARSET_MAX_CODE (cs);
4ed46869 876 }
16fed1fc 877 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 878 return Qnil;
35e623fb 879}
4ed46869 880
4ed46869 881
3263d5a2
KH
882/* Define a charset according to the arguments. The Nth argument is
883 the Nth attribute of the charset (the last attribute `charset-id'
884 is not included). See the docstring of `define-charset' for the
885 detail. */
4ed46869 886
3263d5a2
KH
887DEFUN ("define-charset-internal", Fdefine_charset_internal,
888 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
889 doc: /* For internal use only.
890usage: (define-charset-internal ...) */)
3263d5a2
KH
891 (nargs, args)
892 int nargs;
893 Lisp_Object *args;
4ed46869 894{
3263d5a2
KH
895 /* Charset attr vector. */
896 Lisp_Object attrs;
897 Lisp_Object val;
898 unsigned hash_code;
899 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 900 int i, j;
3263d5a2
KH
901 struct charset charset;
902 int id;
903 int dimension;
904 int new_definition_p;
905 int nchars;
906
907 if (nargs != charset_arg_max)
908 return Fsignal (Qwrong_number_of_arguments,
909 Fcons (intern ("define-charset-internal"),
910 make_number (nargs)));
911
912 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
913
914 CHECK_SYMBOL (args[charset_arg_name]);
915 ASET (attrs, charset_name, args[charset_arg_name]);
916
917 val = args[charset_arg_code_space];
918 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 919 {
3263d5a2
KH
920 int min_byte, max_byte;
921
922 min_byte = XINT (Faref (val, make_number (i * 2)));
923 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
924 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
925 error ("Invalid :code-space value");
926 charset.code_space[i * 4] = min_byte;
927 charset.code_space[i * 4 + 1] = max_byte;
928 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
929 nchars *= charset.code_space[i * 4 + 2];
930 charset.code_space[i * 4 + 3] = nchars;
931 if (max_byte > 0)
932 dimension = i + 1;
933 }
4ed46869 934
3263d5a2
KH
935 val = args[charset_arg_dimension];
936 if (NILP (val))
937 charset.dimension = dimension;
938 else
4ed46869 939 {
3263d5a2
KH
940 CHECK_NATNUM (val);
941 charset.dimension = XINT (val);
942 if (charset.dimension < 1 || charset.dimension > 4)
943 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
944 }
945
3263d5a2
KH
946 charset.code_linear_p
947 = (charset.dimension == 1
948 || (charset.code_space[2] == 256
949 && (charset.dimension == 2
950 || (charset.code_space[6] == 256
951 && (charset.dimension == 3
952 || charset.code_space[10] == 256)))));
953
69f8de5b 954 if (! charset.code_linear_p)
4ed46869 955 {
69f8de5b 956 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 957 bzero (charset.code_space_mask, 256);
69f8de5b
KH
958 for (i = 0; i < 4; i++)
959 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
960 j++)
961 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
962 }
963
3263d5a2 964 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 965
3263d5a2
KH
966 charset.min_code = (charset.code_space[0]
967 | (charset.code_space[4] << 8)
968 | (charset.code_space[8] << 16)
969 | (charset.code_space[12] << 24));
970 charset.max_code = (charset.code_space[1]
971 | (charset.code_space[5] << 8)
972 | (charset.code_space[9] << 16)
973 | (charset.code_space[13] << 24));
820ee249 974 charset.char_index_offset = 0;
8a73a704 975
820ee249
KH
976 val = args[charset_arg_min_code];
977 if (! NILP (val))
978 {
979 unsigned code;
fdb82f93 980
820ee249
KH
981 if (INTEGERP (val))
982 code = XINT (val);
983 else
984 {
985 CHECK_CONS (val);
8f924df7
KH
986 CHECK_NUMBER_CAR (val);
987 CHECK_NUMBER_CDR (val);
820ee249
KH
988 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
989 }
990 if (code < charset.min_code
991 || code > charset.max_code)
992 args_out_of_range_3 (make_number (charset.min_code),
993 make_number (charset.max_code), val);
994 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
995 charset.min_code = code;
996 }
3fac5a51 997
820ee249
KH
998 val = args[charset_arg_max_code];
999 if (! NILP (val))
3fac5a51 1000 {
820ee249
KH
1001 unsigned code;
1002
1003 if (INTEGERP (val))
1004 code = XINT (val);
1005 else
1006 {
1007 CHECK_CONS (val);
8f924df7
KH
1008 CHECK_NUMBER_CAR (val);
1009 CHECK_NUMBER_CDR (val);
820ee249
KH
1010 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
1011 }
1012 if (code < charset.min_code
1013 || code > charset.max_code)
1014 args_out_of_range_3 (make_number (charset.min_code),
1015 make_number (charset.max_code), val);
1016 charset.max_code = code;
3fac5a51 1017 }
3fac5a51 1018
ecca2aad 1019 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 1020
3263d5a2
KH
1021 val = args[charset_arg_invalid_code];
1022 if (NILP (val))
1023 {
1024 if (charset.min_code > 0)
1025 charset.invalid_code = 0;
bbf12bb3
KH
1026 else
1027 {
3263d5a2
KH
1028 XSETINT (val, charset.max_code + 1);
1029 if (XINT (val) == charset.max_code + 1)
1030 charset.invalid_code = charset.max_code + 1;
1031 else
1032 error ("Attribute :invalid-code must be specified");
76d7b829 1033 }
76d7b829 1034 }
3263d5a2
KH
1035 else
1036 {
1037 CHECK_NATNUM (val);
1038 charset.invalid_code = XFASTINT (val);
1039 }
4ed46869 1040
3263d5a2
KH
1041 val = args[charset_arg_iso_final];
1042 if (NILP (val))
1043 charset.iso_final = -1;
1044 else
1045 {
1046 CHECK_NUMBER (val);
1047 if (XINT (val) < '0' || XINT (val) > 127)
1048 error ("Invalid iso-final-char: %d", XINT (val));
1049 charset.iso_final = XINT (val);
1050 }
4ed46869 1051
3263d5a2
KH
1052 val = args[charset_arg_iso_revision];
1053 if (NILP (val))
1054 charset.iso_revision = -1;
1055 else
4ed46869 1056 {
3263d5a2
KH
1057 CHECK_NUMBER (val);
1058 if (XINT (val) > 63)
1059 args_out_of_range (make_number (63), val);
1060 charset.iso_revision = XINT (val);
4ed46869 1061 }
4ed46869 1062
3263d5a2
KH
1063 val = args[charset_arg_emacs_mule_id];
1064 if (NILP (val))
1065 charset.emacs_mule_id = -1;
4ed46869
KH
1066 else
1067 {
3263d5a2
KH
1068 CHECK_NATNUM (val);
1069 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1070 error ("Invalid emacs-mule-id: %d", XINT (val));
1071 charset.emacs_mule_id = XINT (val);
c83ef371 1072 }
f6302ac9 1073
3263d5a2 1074 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1075
3263d5a2 1076 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1077
3263d5a2
KH
1078 charset.unified_p = 0;
1079
1080 bzero (charset.fast_map, sizeof (charset.fast_map));
1081
1082 if (! NILP (args[charset_arg_code_offset]))
1083 {
1084 val = args[charset_arg_code_offset];
1085 CHECK_NUMBER (val);
1086
1087 charset.method = CHARSET_METHOD_OFFSET;
1088 charset.code_offset = XINT (val);
1089
1090 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1091 charset.min_char = i + charset.code_offset;
1092 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1093 charset.max_char = i + charset.code_offset;
1094 if (charset.max_char > MAX_CHAR)
1095 error ("Unsupported max char: %d", charset.max_char);
1096
f148205f
KH
1097 i = (charset.min_char >> 7) << 7;
1098 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1099 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1100 i = (i >> 12) << 12;
3263d5a2
KH
1101 for (; i <= charset.max_char; i += 0x1000)
1102 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1103 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1104 charset.ascii_compatible_p = 1;
3263d5a2
KH
1105 }
1106 else if (! NILP (args[charset_arg_map]))
1107 {
1108 val = args[charset_arg_map];
1109 ASET (attrs, charset_map, val);
ecca2aad 1110 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1111 }
374c5cfd 1112 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1113 {
374c5cfd
KH
1114 Lisp_Object parent;
1115 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1116 struct charset *parent_charset;
1117
1118 val = args[charset_arg_subset];
1119 parent = Fcar (val);
1120 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1121 parent_min_code = Fnth (make_number (1), val);
1122 CHECK_NATNUM (parent_min_code);
1123 parent_max_code = Fnth (make_number (2), val);
1124 CHECK_NATNUM (parent_max_code);
1125 parent_code_offset = Fnth (make_number (3), val);
1126 CHECK_NUMBER (parent_code_offset);
1127 val = Fmake_vector (make_number (4), Qnil);
1128 ASET (val, 0, make_number (parent_charset->id));
1129 ASET (val, 1, parent_min_code);
1130 ASET (val, 2, parent_max_code);
1131 ASET (val, 3, parent_code_offset);
1132 ASET (attrs, charset_subset, val);
1133
1134 charset.method = CHARSET_METHOD_SUBSET;
1135 /* Here, we just copy the parent's fast_map. It's not accurate,
1136 but at least it works for quickly detecting which character
1137 DOESN'T belong to this charset. */
1138 for (i = 0; i < 190; i++)
1139 charset.fast_map[i] = parent_charset->fast_map[i];
1140
1141 /* We also copy these for parents. */
1142 charset.min_char = parent_charset->min_char;
1143 charset.max_char = parent_charset->max_char;
1144 }
1145 else if (! NILP (args[charset_arg_superset]))
0282eb69 1146 {
374c5cfd
KH
1147 val = args[charset_arg_superset];
1148 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1149 val = Fcopy_sequence (val);
374c5cfd 1150 ASET (attrs, charset_superset, val);
3263d5a2
KH
1151
1152 charset.min_char = MAX_CHAR;
1153 charset.max_char = 0;
1154 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1155 {
3263d5a2
KH
1156 Lisp_Object elt, car_part, cdr_part;
1157 int this_id, offset;
1158 struct charset *this_charset;
2e344af3 1159
3263d5a2
KH
1160 elt = Fcar (val);
1161 if (CONSP (elt))
2e344af3 1162 {
3263d5a2
KH
1163 car_part = XCAR (elt);
1164 cdr_part = XCDR (elt);
1165 CHECK_CHARSET_GET_ID (car_part, this_id);
1166 CHECK_NUMBER (cdr_part);
1167 offset = XINT (cdr_part);
177c0ea7 1168 }
3263d5a2 1169 else
4ed46869 1170 {
3263d5a2
KH
1171 CHECK_CHARSET_GET_ID (elt, this_id);
1172 offset = 0;
4ed46869 1173 }
3263d5a2
KH
1174 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1175
1176 this_charset = CHARSET_FROM_ID (this_id);
1177 if (charset.min_char > this_charset->min_char)
1178 charset.min_char = this_charset->min_char;
1179 if (charset.max_char < this_charset->max_char)
1180 charset.max_char = this_charset->max_char;
1181 for (i = 0; i < 190; i++)
1182 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1183 }
0282eb69 1184 }
2e344af3 1185 else
3263d5a2 1186 error ("None of :code-offset, :map, :parents are specified");
05505664 1187
3263d5a2
KH
1188 val = args[charset_arg_unify_map];
1189 if (! NILP (val) && !STRINGP (val))
1190 CHECK_VECTOR (val);
1191 ASET (attrs, charset_unify_map, val);
05505664 1192
3263d5a2
KH
1193 CHECK_LIST (args[charset_arg_plist]);
1194 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1195
3263d5a2
KH
1196 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1197 &hash_code);
1198 if (charset.hash_index >= 0)
1199 {
1200 new_definition_p = 0;
4f65af01 1201 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1202 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1203 }
1a45ff10 1204 else
3263d5a2
KH
1205 {
1206 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1207 hash_code);
1208 if (charset_table_used == charset_table_size)
1209 {
2fe1edd1
KH
1210 struct charset *new_table
1211 = (struct charset *) xmalloc (sizeof (struct charset)
1212 * (charset_table_size + 16));
1213 bcopy (charset_table, new_table,
1214 sizeof (struct charset) * charset_table_size);
1215 charset_table_size += 16;
1216 charset_table = new_table;
3263d5a2
KH
1217 }
1218 id = charset_table_used++;
3263d5a2
KH
1219 new_definition_p = 1;
1220 }
2e344af3 1221
4f65af01 1222 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1223 charset.id = id;
1224 charset_table[id] = charset;
2e344af3 1225
ecca2aad 1226 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1227 {
1228 load_charset (&charset, 0);
1229 charset_table[id] = charset;
1230 }
ecca2aad 1231
3263d5a2 1232 if (charset.iso_final >= 0)
4ed46869 1233 {
3263d5a2
KH
1234 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1235 charset.iso_final) = id;
1236 if (new_definition_p)
1237 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1238 Fcons (make_number (id), Qnil));
7c7dceee
KH
1239 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1240 charset_jisx0201_roman = id;
1241 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1242 charset_jisx0208_1978 = id;
1243 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1244 charset_jisx0208 = id;
d32320c4
KH
1245 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1246 charset_ksc5601 = id;
4ed46869 1247 }
d0cf2d48 1248
3263d5a2 1249 if (charset.emacs_mule_id >= 0)
4ed46869 1250 {
b84ae584 1251 emacs_mule_charset[charset.emacs_mule_id] = id;
4f65af01
KH
1252 if (charset.emacs_mule_id < 0xA0)
1253 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1254 else
1255 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1256 if (new_definition_p)
1257 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1258 Fcons (make_number (id), Qnil));
4ed46869
KH
1259 }
1260
3263d5a2
KH
1261 if (new_definition_p)
1262 {
1263 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1264 if (charset.supplementary_p)
1265 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1266 Fcons (make_number (id), Qnil));
1267 else
880820fe
KH
1268 {
1269 Lisp_Object tail;
1270
1271 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1272 {
1273 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1274
1275 if (cs->supplementary_p)
1276 break;
1277 }
1278 if (EQ (tail, Vcharset_ordered_list))
1279 Vcharset_ordered_list = Fcons (make_number (id),
1280 Vcharset_ordered_list);
1281 else if (NILP (tail))
1282 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1283 Fcons (make_number (id), Qnil));
1284 else
1285 {
1286 val = Fcons (XCAR (tail), XCDR (tail));
1287 XSETCDR (tail, val);
1288 XSETCAR (tail, make_number (id));
1289 }
1290 }
dbbb237d 1291 charset_ordered_list_tick++;
3263d5a2 1292 }
4ed46869 1293
3263d5a2 1294 return Qnil;
4ed46869
KH
1295}
1296
2fe1edd1
KH
1297
1298/* Same as Fdefine_charset_internal but arguments are more convenient
1299 to call from C (typically in syms_of_charset). This can define a
1300 charset of `offset' method only. Return the ID of the new
1301 charset. */
1302
1303static int
1304define_charset_internal (name, dimension, code_space, min_code, max_code,
1305 iso_final, iso_revision, emacs_mule_id,
7acf89e6 1306 ascii_compatible, supplementary,
2fe1edd1
KH
1307 code_offset)
1308 Lisp_Object name;
1309 int dimension;
1310 unsigned char *code_space;
1311 unsigned min_code, max_code;
1312 int iso_final, iso_revision, emacs_mule_id;
7acf89e6 1313 int ascii_compatible, supplementary;
2fe1edd1
KH
1314 int code_offset;
1315{
1316 Lisp_Object args[charset_arg_max];
1317 Lisp_Object plist[14];
1318 Lisp_Object val;
1319 int i;
1320
1321 args[charset_arg_name] = name;
1322 args[charset_arg_dimension] = make_number (dimension);
1323 val = Fmake_vector (make_number (8), make_number (0));
1324 for (i = 0; i < 8; i++)
1325 ASET (val, i, make_number (code_space[i]));
1326 args[charset_arg_code_space] = val;
1327 args[charset_arg_min_code] = make_number (min_code);
1328 args[charset_arg_max_code] = make_number (max_code);
1329 args[charset_arg_iso_final]
1330 = (iso_final < 0 ? Qnil : make_number (iso_final));
1331 args[charset_arg_iso_revision] = make_number (iso_revision);
1332 args[charset_arg_emacs_mule_id]
1333 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1334 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1335 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1336 args[charset_arg_invalid_code] = Qnil;
1337 args[charset_arg_code_offset] = make_number (code_offset);
1338 args[charset_arg_map] = Qnil;
1339 args[charset_arg_subset] = Qnil;
1340 args[charset_arg_superset] = Qnil;
1341 args[charset_arg_unify_map] = Qnil;
1342
d67b4f80 1343 plist[0] = intern_c_string (":name");
2fe1edd1 1344 plist[1] = args[charset_arg_name];
d67b4f80 1345 plist[2] = intern_c_string (":dimension");
2fe1edd1 1346 plist[3] = args[charset_arg_dimension];
d67b4f80 1347 plist[4] = intern_c_string (":code-space");
2fe1edd1 1348 plist[5] = args[charset_arg_code_space];
d67b4f80 1349 plist[6] = intern_c_string (":iso-final-char");
2fe1edd1 1350 plist[7] = args[charset_arg_iso_final];
d67b4f80 1351 plist[8] = intern_c_string (":emacs-mule-id");
2fe1edd1 1352 plist[9] = args[charset_arg_emacs_mule_id];
d67b4f80 1353 plist[10] = intern_c_string (":ascii-compatible-p");
2fe1edd1 1354 plist[11] = args[charset_arg_ascii_compatible_p];
d67b4f80 1355 plist[12] = intern_c_string (":code-offset");
2fe1edd1
KH
1356 plist[13] = args[charset_arg_code_offset];
1357
1358 args[charset_arg_plist] = Flist (14, plist);
1359 Fdefine_charset_internal (charset_arg_max, args);
1360
1361 return XINT (CHARSET_SYMBOL_ID (name));
1362}
1363
1364
3263d5a2
KH
1365DEFUN ("define-charset-alias", Fdefine_charset_alias,
1366 Sdefine_charset_alias, 2, 2, 0,
1367 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1368 (alias, charset)
1369 Lisp_Object alias, charset;
4ed46869 1370{
3263d5a2
KH
1371 Lisp_Object attr;
1372
1373 CHECK_CHARSET_GET_ATTR (charset, attr);
1374 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1375 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1376 return Qnil;
1377}
4ed46869 1378
4ed46869 1379
3263d5a2 1380DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1381 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1382 (charset)
1383 Lisp_Object charset;
1384{
1385 Lisp_Object attrs;
1386
1387 CHECK_CHARSET_GET_ATTR (charset, attrs);
1388 return CHARSET_ATTR_PLIST (attrs);
1389}
1390
1391
1392DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1393 doc: /* Set CHARSET's property list to PLIST. */)
1394 (charset, plist)
1395 Lisp_Object charset, plist;
1396{
1397 Lisp_Object attrs;
1398
1399 CHECK_CHARSET_GET_ATTR (charset, attrs);
1400 CHARSET_ATTR_PLIST (attrs) = plist;
1401 return plist;
1402}
1403
1404
dbbb237d 1405DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1406 doc: /* Unify characters of CHARSET with Unicode.
1407This means reading the relevant file and installing the table defined
dbbb237d
KH
1408by CHARSET's `:unify-map' property.
1409
64165ae2
DL
1410Optional second arg UNIFY-MAP is a file name string or a vector. It has
1411the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1412`define-charset' (which see).
1413
1414Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1415 (charset, unify_map, deunify)
1416 Lisp_Object charset, unify_map, deunify;
8a73a704 1417{
3263d5a2
KH
1418 int id;
1419 struct charset *cs;
8f924df7 1420
3263d5a2
KH
1421 CHECK_CHARSET_GET_ID (charset, id);
1422 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1423 if (NILP (deunify)
1424 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1425 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1426 return Qnil;
dbbb237d 1427
3263d5a2 1428 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1429 if (NILP (deunify))
1430 {
ecca2aad
KH
1431 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1432 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1433 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1434 if (NILP (unify_map))
1435 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1436 else
ecca2aad
KH
1437 {
1438 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1439 signal_error ("Bad unify-map", unify_map);
1440 CHARSET_UNIFY_MAP (cs) = unify_map;
1441 }
1442 if (NILP (Vchar_unify_table))
1443 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1444 char_table_set_range (Vchar_unify_table,
1445 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1446 CHARSET_UNIFIED_P (cs) = 1;
1447 }
1448 else if (CHAR_TABLE_P (Vchar_unify_table))
1449 {
1450 int min_code = CHARSET_MIN_CODE (cs);
1451 int max_code = CHARSET_MAX_CODE (cs);
1452 int min_char = DECODE_CHAR (cs, min_code);
1453 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1454
dbbb237d
KH
1455 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1456 }
8f924df7 1457
3263d5a2 1458 return Qnil;
8a73a704
KH
1459}
1460
3fac5a51
KH
1461DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1462 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1463 doc: /*
d0cf2d48 1464Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1465DIMENSION is the number of bytes to represent a character: 1 or 2.
1466CHARS is the number of characters in a dimension: 94 or 96.
1467
1468This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1469If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1470return nil. */)
1471 (dimension, chars)
3fac5a51
KH
1472 Lisp_Object dimension, chars;
1473{
1474 int final_char;
1475
b7826503
PJ
1476 CHECK_NUMBER (dimension);
1477 CHECK_NUMBER (chars);
3263d5a2
KH
1478 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1479 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1480 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1481 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1482 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1483 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1484 break;
3fac5a51
KH
1485 return (final_char <= '?' ? make_number (final_char) : Qnil);
1486}
1487
3263d5a2
KH
1488static void
1489check_iso_charset_parameter (dimension, chars, final_char)
1490 Lisp_Object dimension, chars, final_char;
4ed46869 1491{
3263d5a2
KH
1492 CHECK_NATNUM (dimension);
1493 CHECK_NATNUM (chars);
1494 CHECK_NATNUM (final_char);
4ed46869 1495
3263d5a2
KH
1496 if (XINT (dimension) > 3)
1497 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1498 if (XINT (chars) != 94 && XINT (chars) != 96)
1499 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1500 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1501 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1502}
1503
1504
1505DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1506 4, 4, 0,
cefd8c4f
KH
1507 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1508
1509On decoding by an ISO-2022 base coding system, when a charset
1510specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1511if CHARSET is designated instead. */)
3263d5a2
KH
1512 (dimension, chars, final_char, charset)
1513 Lisp_Object dimension, chars, final_char, charset;
1514{
1515 int id;
82215ce9 1516 int chars_flag;
4ed46869 1517
3263d5a2
KH
1518 CHECK_CHARSET_GET_ID (charset, id);
1519 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1520 chars_flag = XINT (chars) == 96;
1521 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1522 return Qnil;
1523}
1524
3263d5a2 1525
2e344af3
KH
1526/* Return information about charsets in the text at PTR of NBYTES
1527 bytes, which are NCHARS characters. The value is:
f6302ac9 1528
cfe34140 1529 0: Each character is represented by one byte. This is always
3263d5a2
KH
1530 true for a unibyte string. For a multibyte string, true if
1531 it contains only ASCII characters.
1532
28c026cd
DL
1533 1: No charsets other than ascii, control-1, and latin-1 are
1534 found.
1d67c29b 1535
3263d5a2
KH
1536 2: Otherwise.
1537*/
4ed46869
KH
1538
1539int
3263d5a2
KH
1540string_xstring_p (string)
1541 Lisp_Object string;
4ed46869 1542{
8f924df7
KH
1543 const unsigned char *p = SDATA (string);
1544 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1545
8f924df7 1546 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1547 return 0;
1548
3263d5a2 1549 while (p < endp)
0282eb69 1550 {
3263d5a2 1551 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1552
3cc67a4d 1553 if (c >= 0x100)
3263d5a2 1554 return 2;
0282eb69 1555 }
3263d5a2
KH
1556 return 1;
1557}
05505664 1558
05505664 1559
3263d5a2 1560/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1561
3cc67a4d
KH
1562 CHARSETS is a vector. If Nth element is non-nil, it means the
1563 charset whose id is N is already found.
2e344af3 1564
3263d5a2 1565 It may lookup a translation table TABLE if supplied. */
2e344af3 1566
3263d5a2 1567static void
3cc67a4d 1568find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
dbbb237d 1569 const unsigned char *ptr;
42ca828e 1570 EMACS_INT nchars, nbytes;
3263d5a2 1571 Lisp_Object charsets, table;
3cc67a4d 1572 int multibyte;
3263d5a2 1573{
dbbb237d 1574 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1575
1576 if (nchars == nbytes)
3263d5a2 1577 {
3cc67a4d
KH
1578 if (multibyte)
1579 ASET (charsets, charset_ascii, Qt);
1580 else
1581 while (ptr < pend)
1582 {
1583 int c = *ptr++;
1584
1585 if (!NILP (table))
1586 c = translate_char (table, c);
1587 if (ASCII_BYTE_P (c))
1588 ASET (charsets, charset_ascii, Qt);
1589 else
1590 ASET (charsets, charset_eight_bit, Qt);
1591 }
1592 }
1593 else
1594 {
1595 while (ptr < pend)
3263d5a2 1596 {
3cc67a4d
KH
1597 int c = STRING_CHAR_ADVANCE (ptr);
1598 struct charset *charset;
3263d5a2 1599
3cc67a4d
KH
1600 if (!NILP (table))
1601 c = translate_char (table, c);
1602 charset = CHAR_CHARSET (c);
1603 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1604 }
4ed46869 1605 }
4ed46869
KH
1606}
1607
1608DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1609 2, 3, 0,
fdb82f93
PJ
1610 doc: /* Return a list of charsets in the region between BEG and END.
1611BEG and END are buffer positions.
1612Optional arg TABLE if non-nil is a translation table to look up.
1613
fdb82f93
PJ
1614If the current buffer is unibyte, the returned list may contain
1615only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1616 (beg, end, table)
23d2a7f1 1617 Lisp_Object beg, end, table;
4ed46869 1618{
3263d5a2 1619 Lisp_Object charsets;
42ca828e
DL
1620 EMACS_INT from, from_byte, to, stop, stop_byte;
1621 int i;
4ed46869 1622 Lisp_Object val;
3cc67a4d 1623 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1624
1625 validate_region (&beg, &end);
1626 from = XFASTINT (beg);
1627 stop = to = XFASTINT (end);
6ae1f27e 1628
4ed46869 1629 if (from < GPT && GPT < to)
6ae1f27e
RS
1630 {
1631 stop = GPT;
1632 stop_byte = GPT_BYTE;
1633 }
1634 else
1635 stop_byte = CHAR_TO_BYTE (stop);
1636
1637 from_byte = CHAR_TO_BYTE (from);
1638
3263d5a2 1639 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1640 while (1)
1641 {
3263d5a2 1642 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1643 stop_byte - from_byte, charsets, table,
1644 multibyte);
4ed46869 1645 if (stop < to)
6ae1f27e
RS
1646 {
1647 from = stop, from_byte = stop_byte;
1648 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1649 }
4ed46869
KH
1650 else
1651 break;
1652 }
6ae1f27e 1653
4ed46869 1654 val = Qnil;
3263d5a2 1655 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1656 if (!NILP (AREF (charsets, i)))
3263d5a2 1657 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1658 return val;
1659}
1660
1661DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1662 1, 2, 0,
fdb82f93
PJ
1663 doc: /* Return a list of charsets in STR.
1664Optional arg TABLE if non-nil is a translation table to look up.
1665
fdb82f93 1666If STR is unibyte, the returned list may contain
3263d5a2 1667only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1668 (str, table)
23d2a7f1 1669 Lisp_Object str, table;
4ed46869 1670{
3263d5a2 1671 Lisp_Object charsets;
4ed46869
KH
1672 int i;
1673 Lisp_Object val;
1674
b7826503 1675 CHECK_STRING (str);
87b089ad 1676
3263d5a2 1677 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1678 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1679 charsets, table,
1680 STRING_MULTIBYTE (str));
4ed46869 1681 val = Qnil;
3263d5a2 1682 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1683 if (!NILP (AREF (charsets, i)))
3263d5a2 1684 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1685 return val;
1686}
2e344af3 1687
4ed46869 1688\f
3263d5a2 1689
ecca2aad
KH
1690/* Return a unified character code for C (>= 0x110000). VAL is a
1691 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1692 charset symbol. */
1693int
1694maybe_unify_char (c, val)
1695 int c;
1696 Lisp_Object val;
1697{
1698 struct charset *charset;
1699
1700 if (INTEGERP (val))
1701 return XINT (val);
1702 if (NILP (val))
1703 return c;
1704
1705 CHECK_CHARSET_GET_CHARSET (val, charset);
1706 load_charset (charset, 1);
1707 if (! inhibit_load_charset_map)
1708 {
1709 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1710 if (! NILP (val))
1711 c = XINT (val);
1712 }
1713 else
1714 {
1715 int code_index = c - CHARSET_CODE_OFFSET (charset);
1716 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1717
1718 if (unified > 0)
1719 c = unified;
1720 }
1721 return c;
1722}
1723
1724
3263d5a2
KH
1725/* Return a character correponding to the code-point CODE of
1726 CHARSET. */
1727
1728int
1729decode_char (charset, code)
1730 struct charset *charset;
1731 unsigned code;
4ed46869 1732{
3263d5a2
KH
1733 int c, char_index;
1734 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1735
3263d5a2
KH
1736 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1737 return -1;
4ed46869 1738
374c5cfd 1739 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1740 {
374c5cfd
KH
1741 Lisp_Object subset_info;
1742
1743 subset_info = CHARSET_SUBSET (charset);
1744 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1745 code -= XINT (AREF (subset_info, 3));
1746 if (code < XFASTINT (AREF (subset_info, 1))
1747 || code > XFASTINT (AREF (subset_info, 2)))
1748 c = -1;
1749 else
1750 c = DECODE_CHAR (charset, code);
2e344af3 1751 }
374c5cfd 1752 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1753 {
3263d5a2 1754 Lisp_Object parents;
4ed46869 1755
374c5cfd 1756 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1757 c = -1;
1758 for (; CONSP (parents); parents = XCDR (parents))
1759 {
1760 int id = XINT (XCAR (XCAR (parents)));
1761 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1762 unsigned this_code = code - code_offset;
4ed46869 1763
3263d5a2
KH
1764 charset = CHARSET_FROM_ID (id);
1765 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1766 break;
1767 }
1768 }
1769 else
ac4137cc 1770 {
3263d5a2 1771 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1772 if (char_index < 0)
1773 return -1;
4ed46869 1774
3263d5a2 1775 if (method == CHARSET_METHOD_MAP)
ac4137cc 1776 {
3263d5a2 1777 Lisp_Object decoder;
4ed46869 1778
3263d5a2
KH
1779 decoder = CHARSET_DECODER (charset);
1780 if (! VECTORP (decoder))
ecca2aad
KH
1781 {
1782 load_charset (charset, 1);
1783 decoder = CHARSET_DECODER (charset);
1784 }
1785 if (VECTORP (decoder))
1786 c = XINT (AREF (decoder, char_index));
1787 else
1788 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1789 }
ecca2aad 1790 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1791 {
3263d5a2 1792 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1793 if (CHARSET_UNIFIED_P (charset)
1794 && c > MAX_UNICODE_CHAR)
1795 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1796 }
1797 }
4ed46869 1798
3263d5a2 1799 return c;
90d7b74e
KH
1800}
1801
374c5cfd
KH
1802/* Variable used temporarily by the macro ENCODE_CHAR. */
1803Lisp_Object charset_work;
4ed46869 1804
3263d5a2 1805/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1806 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1807 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1808
3263d5a2
KH
1809unsigned
1810encode_char (charset, c)
1811 struct charset *charset;
9b6a601f 1812 int c;
9d3d8cba 1813{
3263d5a2
KH
1814 unsigned code;
1815 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1816
3263d5a2 1817 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1818 {
6809ca75 1819 Lisp_Object deunifier;
ecca2aad 1820 int code_index = -1;
4ed46869 1821
3263d5a2
KH
1822 deunifier = CHARSET_DEUNIFIER (charset);
1823 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1824 {
ecca2aad 1825 load_charset (charset, 2);
3263d5a2 1826 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1827 }
ecca2aad
KH
1828 if (CHAR_TABLE_P (deunifier))
1829 {
1830 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1831
1832 if (INTEGERP (deunified))
1833 code_index = XINT (deunified);
1834 }
1835 else
1836 {
1837 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1838 }
1839 if (code_index >= 0)
1840 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1841 }
9d3d8cba 1842
374c5cfd
KH
1843 if (method == CHARSET_METHOD_SUBSET)
1844 {
1845 Lisp_Object subset_info;
1846 struct charset *this_charset;
1847
1848 subset_info = CHARSET_SUBSET (charset);
1849 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1850 code = ENCODE_CHAR (this_charset, c);
1851 if (code == CHARSET_INVALID_CODE (this_charset)
1852 || code < XFASTINT (AREF (subset_info, 1))
1853 || code > XFASTINT (AREF (subset_info, 2)))
1854 return CHARSET_INVALID_CODE (charset);
1855 code += XINT (AREF (subset_info, 3));
1856 return code;
1857 }
9d3d8cba 1858
374c5cfd 1859 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1860 {
3263d5a2 1861 Lisp_Object parents;
d2665018 1862
374c5cfd 1863 parents = CHARSET_SUPERSET (charset);
3263d5a2 1864 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1865 {
3263d5a2
KH
1866 int id = XINT (XCAR (XCAR (parents)));
1867 int code_offset = XINT (XCDR (XCAR (parents)));
1868 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1869
3263d5a2 1870 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1871 if (code != CHARSET_INVALID_CODE (this_charset))
1872 return code + code_offset;
beeedaad 1873 }
3263d5a2
KH
1874 return CHARSET_INVALID_CODE (charset);
1875 }
1bcc1567 1876
15c85a88
KH
1877 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1878 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1879 return CHARSET_INVALID_CODE (charset);
1bcc1567 1880
3263d5a2 1881 if (method == CHARSET_METHOD_MAP)
3f62427c 1882 {
3263d5a2 1883 Lisp_Object encoder;
beeedaad 1884 Lisp_Object val;
9b6a601f 1885
3263d5a2
KH
1886 encoder = CHARSET_ENCODER (charset);
1887 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1888 {
1889 load_charset (charset, 2);
1890 encoder = CHARSET_ENCODER (charset);
1891 }
1892 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1893 {
1894 val = CHAR_TABLE_REF (encoder, c);
1895 if (NILP (val))
1896 return CHARSET_INVALID_CODE (charset);
1897 code = XINT (val);
1898 if (! CHARSET_COMPACT_CODES_P (charset))
1899 code = INDEX_TO_CODE_POINT (charset, code);
1900 }
1901 else
1902 {
1903 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1904 code = INDEX_TO_CODE_POINT (charset, code);
1905 }
3263d5a2 1906 }
820ee249 1907 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1908 {
ecca2aad
KH
1909 int code_index = c - CHARSET_CODE_OFFSET (charset);
1910
1911 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1912 }
8ac5a9cc 1913
3263d5a2 1914 return code;
4ed46869
KH
1915}
1916
4ed46869 1917
3263d5a2
KH
1918DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1919 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1920Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1921
3263d5a2 1922CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1923
3263d5a2 1924Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1925and CODE-POINT to a character. Currently not supported and just ignored. */)
3263d5a2
KH
1926 (charset, code_point, restriction)
1927 Lisp_Object charset, code_point, restriction;
4ed46869 1928{
3263d5a2
KH
1929 int c, id;
1930 unsigned code;
1931 struct charset *charsetp;
859f2b3c 1932
3263d5a2
KH
1933 CHECK_CHARSET_GET_ID (charset, id);
1934 if (CONSP (code_point))
1935 {
8f924df7
KH
1936 CHECK_NATNUM_CAR (code_point);
1937 CHECK_NATNUM_CDR (code_point);
69f8de5b 1938 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1939 }
4ed46869
KH
1940 else
1941 {
3263d5a2
KH
1942 CHECK_NATNUM (code_point);
1943 code = XINT (code_point);
4ed46869 1944 }
3263d5a2
KH
1945 charsetp = CHARSET_FROM_ID (id);
1946 c = DECODE_CHAR (charsetp, code);
1947 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1948}
1949
859f2b3c 1950
3263d5a2
KH
1951DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1952 doc: /* Encode the character CH into a code-point of CHARSET.
1953Return nil if CHARSET doesn't include CH.
17e7ef1b 1954
d0cf2d48 1955Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2
KH
1956code-point in CCS. Currently not supported and just ignored. */)
1957 (ch, charset, restriction)
1958 Lisp_Object ch, charset, restriction;
4ed46869 1959{
16fed1fc 1960 int id;
3263d5a2
KH
1961 unsigned code;
1962 struct charset *charsetp;
046b1f03 1963
3263d5a2
KH
1964 CHECK_CHARSET_GET_ID (charset, id);
1965 CHECK_NATNUM (ch);
3263d5a2 1966 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1967 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1968 if (code == CHARSET_INVALID_CODE (charsetp))
1969 return Qnil;
1970 if (code > 0x7FFFFFF)
1971 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1972 return make_number (code);
beeedaad
KH
1973}
1974
beeedaad 1975
b121a744
KH
1976DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1977 doc:
1978 /* Return a character of CHARSET whose position codes are CODEn.
1979
1980CODE1 through CODE4 are optional, but if you don't supply sufficient
1981position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1982is specified. */)
b121a744
KH
1983 (charset, code1, code2, code3, code4)
1984 Lisp_Object charset, code1, code2, code3, code4;
beeedaad 1985{
3263d5a2
KH
1986 int id, dimension;
1987 struct charset *charsetp;
b121a744
KH
1988 unsigned code;
1989 int c;
87b089ad 1990
3263d5a2
KH
1991 CHECK_CHARSET_GET_ID (charset, id);
1992 charsetp = CHARSET_FROM_ID (id);
4ed46869 1993
b121a744
KH
1994 dimension = CHARSET_DIMENSION (charsetp);
1995 if (NILP (code1))
d47073ca
KH
1996 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1997 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1998 else
859f2b3c 1999 {
b121a744
KH
2000 CHECK_NATNUM (code1);
2001 if (XFASTINT (code1) >= 0x100)
2002 args_out_of_range (make_number (0xFF), code1);
2003 code = XFASTINT (code1);
859f2b3c 2004
b0a1e45e 2005 if (dimension > 1)
beeedaad 2006 {
b121a744 2007 code <<= 8;
b0a1e45e
KH
2008 if (NILP (code2))
2009 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 2010 else
b121a744 2011 {
b0a1e45e
KH
2012 CHECK_NATNUM (code2);
2013 if (XFASTINT (code2) >= 0x100)
2014 args_out_of_range (make_number (0xFF), code2);
2015 code |= XFASTINT (code2);
b121a744 2016 }
99529c2c 2017
b0a1e45e 2018 if (dimension > 2)
b121a744
KH
2019 {
2020 code <<= 8;
b0a1e45e
KH
2021 if (NILP (code3))
2022 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
2023 else
2024 {
b0a1e45e
KH
2025 CHECK_NATNUM (code3);
2026 if (XFASTINT (code3) >= 0x100)
2027 args_out_of_range (make_number (0xFF), code3);
2028 code |= XFASTINT (code3);
2029 }
2030
2031 if (dimension > 3)
2032 {
2033 code <<= 8;
2034 if (NILP (code4))
2035 code |= charsetp->code_space[0];
2036 else
2037 {
2038 CHECK_NATNUM (code4);
2039 if (XFASTINT (code4) >= 0x100)
2040 args_out_of_range (make_number (0xFF), code4);
2041 code |= XFASTINT (code4);
2042 }
b121a744
KH
2043 }
2044 }
beeedaad 2045 }
859f2b3c 2046 }
beeedaad 2047
b121a744
KH
2048 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2049 code &= 0x7F7F7F7F;
2050 c = DECODE_CHAR (charsetp, code);
2051 if (c < 0)
2052 error ("Invalid code(s)");
3263d5a2 2053 return make_number (c);
4ed46869
KH
2054}
2055
beeedaad 2056
3263d5a2
KH
2057/* Return the first charset in CHARSET_LIST that contains C.
2058 CHARSET_LIST is a list of charset IDs. If it is nil, use
2059 Vcharset_ordered_list. */
beeedaad 2060
3263d5a2
KH
2061struct charset *
2062char_charset (c, charset_list, code_return)
2063 int c;
2064 Lisp_Object charset_list;
2065 unsigned *code_return;
2e344af3 2066{
8a1816bb
KH
2067 int maybe_null = 0;
2068
3263d5a2
KH
2069 if (NILP (charset_list))
2070 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2071 else
2072 maybe_null = 1;
beeedaad 2073
6c652beb 2074 while (CONSP (charset_list))
2e344af3 2075 {
3263d5a2
KH
2076 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2077 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2078
3263d5a2 2079 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2080 {
3263d5a2
KH
2081 if (code_return)
2082 *code_return = code;
2083 return charset;
3f62427c 2084 }
3263d5a2 2085 charset_list = XCDR (charset_list);
6c652beb
KH
2086 if (c <= MAX_UNICODE_CHAR
2087 && EQ (charset_list, Vcharset_non_preferred_head))
2088 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2089 }
8a1816bb
KH
2090 return (maybe_null ? NULL
2091 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2092 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2093}
2094
2e344af3 2095
3263d5a2 2096DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2097 doc:
d0cf2d48 2098 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2099The charset is decided by the current priority order of charsets.
2100A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2101CH in the charset. */)
3263d5a2
KH
2102 (ch)
2103 Lisp_Object ch;
4ed46869 2104{
3263d5a2
KH
2105 struct charset *charset;
2106 int c, dimension;
2107 unsigned code;
4ed46869
KH
2108 Lisp_Object val;
2109
3263d5a2
KH
2110 CHECK_CHARACTER (ch);
2111 c = XFASTINT (ch);
2112 charset = CHAR_CHARSET (c);
2113 if (! charset)
3cc67a4d 2114 abort ();
3263d5a2
KH
2115 code = ENCODE_CHAR (charset, c);
2116 if (code == CHARSET_INVALID_CODE (charset))
2117 abort ();
2118 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2119 for (val = Qnil; dimension > 0; dimension--)
2120 {
2121 val = Fcons (make_number (code & 0xFF), val);
2122 code >>= 8;
2123 }
3263d5a2 2124 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2125}
2126
740f080d 2127
4cb75c4b
KH
2128DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2129 doc: /* Return the charset of highest priority that contains CH.
2130If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2131from which to find the charset. It may also be a coding system. In
2132that case, find the charset from what supported by that coding system. */)
2133 (ch, restriction)
2134 Lisp_Object ch, restriction;
4ed46869 2135{
3263d5a2 2136 struct charset *charset;
4ed46869 2137
3263d5a2 2138 CHECK_CHARACTER (ch);
4cb75c4b
KH
2139 if (NILP (restriction))
2140 charset = CHAR_CHARSET (XINT (ch));
2141 else
2142 {
4cb75c4b
KH
2143 if (CONSP (restriction))
2144 {
7a84eee5
KH
2145 int c = XFASTINT (ch);
2146
2147 for (; CONSP (restriction); restriction = XCDR (restriction))
4cb75c4b 2148 {
7a84eee5 2149 struct charset *charset;
4cb75c4b 2150
7a84eee5
KH
2151 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), charset);
2152 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
2153 return XCAR (restriction);
4cb75c4b 2154 }
7a84eee5 2155 return Qnil;
4cb75c4b 2156 }
7a84eee5
KH
2157 restriction = coding_system_charset_list (restriction);
2158 charset = char_charset (XINT (ch), restriction, NULL);
4cb75c4b
KH
2159 if (! charset)
2160 return Qnil;
2161 }
3263d5a2 2162 return (CHARSET_NAME (charset));
4ed46869
KH
2163}
2164
17e7ef1b 2165
3263d5a2
KH
2166DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2167 doc: /*
2168Return charset of a character in the current buffer at position POS.
2169If POS is nil, it defauls to the current point.
2170If POS is out of range, the value is nil. */)
2171 (pos)
2172 Lisp_Object pos;
2e344af3 2173{
3263d5a2
KH
2174 Lisp_Object ch;
2175 struct charset *charset;
046b1f03 2176
3263d5a2
KH
2177 ch = Fchar_after (pos);
2178 if (! INTEGERP (ch))
2179 return ch;
2180 charset = CHAR_CHARSET (XINT (ch));
2181 return (CHARSET_NAME (charset));
6ae1f27e 2182}
9036eb45 2183
87b089ad 2184
3263d5a2
KH
2185DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2186 doc: /*
2187Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2188
2189ISO 2022's designation sequence (escape sequence) distinguishes charsets
2190by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2191whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2192See the documentation of the function `charset-info' for the meanings of
2193DIMENSION, CHARS, and FINAL-CHAR. */)
2194 (dimension, chars, final_char)
2195 Lisp_Object dimension, chars, final_char;
6ae1f27e 2196{
3263d5a2 2197 int id;
82215ce9 2198 int chars_flag;
a8a35e61 2199
3263d5a2 2200 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2201 chars_flag = XFASTINT (chars) == 96;
2202 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2203 XFASTINT (final_char));
2204 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2205}
2206
87b089ad 2207
3263d5a2
KH
2208DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2209 0, 0, 0,
2210 doc: /*
ecca2aad
KH
2211Internal use only.
2212Clear temporary charset mapping tables.
2213It should be called only from temacs invoked for dumping. */)
3263d5a2 2214 ()
87b089ad 2215{
ecca2aad 2216 if (temp_charset_work)
87b089ad 2217 {
ecca2aad
KH
2218 free (temp_charset_work);
2219 temp_charset_work = NULL;
2e344af3 2220 }
2e344af3 2221
ecca2aad
KH
2222 if (CHAR_TABLE_P (Vchar_unify_table))
2223 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2224
3263d5a2 2225 return Qnil;
740f080d
KH
2226}
2227
8ddf5e57
DL
2228DEFUN ("charset-priority-list", Fcharset_priority_list,
2229 Scharset_priority_list, 0, 1, 0,
2230 doc: /* Return the list of charsets ordered by priority.
2231HIGHESTP non-nil means just return the highest priority one. */)
2232 (highestp)
2233 Lisp_Object highestp;
2e344af3 2234{
8ddf5e57 2235 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2236
8ddf5e57 2237 if (!NILP (highestp))
16fed1fc 2238 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2239
8ddf5e57 2240 while (!NILP (list))
2e344af3 2241 {
16fed1fc 2242 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2243 list = XCDR (list);
2e344af3 2244 }
8ddf5e57 2245 return Fnreverse (val);
2e344af3
KH
2246}
2247
8ddf5e57
DL
2248DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2249 1, MANY, 0,
2250 doc: /* Assign higher priority to the charsets given as arguments.
2251usage: (set-charset-priority &rest charsets) */)
2252 (nargs, args)
2253 int nargs;
4ed46869
KH
2254 Lisp_Object *args;
2255{
af7c60ca 2256 Lisp_Object new_head, old_list, arglist[2];
321c819c 2257 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2258 int i, id;
4ed46869 2259
8ddf5e57 2260 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2261 new_head = Qnil;
8ddf5e57 2262 for (i = 0; i < nargs; i++)
4ed46869 2263 {
8ddf5e57 2264 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2265 if (! NILP (Fmemq (make_number (id), old_list)))
2266 {
2267 old_list = Fdelq (make_number (id), old_list);
2268 new_head = Fcons (make_number (id), new_head);
2269 }
5729c92f 2270 }
8ddf5e57 2271 arglist[0] = Fnreverse (new_head);
6a9c90ec 2272 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2273 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2274 charset_ordered_list_tick++;
5729c92f 2275
6809ca75 2276 charset_unibyte = -1;
321c819c 2277 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2278 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2279 {
e77415b0 2280 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2281 list_2022 = Fcons (XCAR (old_list), list_2022);
2282 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2283 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2284 if (charset_unibyte < 0)
2285 {
2286 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2287
2288 if (CHARSET_DIMENSION (charset) == 1
2289 && CHARSET_ASCII_COMPATIBLE_P (charset)
2290 && CHARSET_MAX_CHAR (charset) >= 0x80)
2291 charset_unibyte = CHARSET_ID (charset);
2292 }
4ed46869 2293 }
321c819c
KH
2294 Viso_2022_charset_list = Fnreverse (list_2022);
2295 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2296 if (charset_unibyte < 0)
2297 charset_unibyte = charset_iso_8859_1;
4ed46869 2298
8ddf5e57 2299 return Qnil;
4ed46869
KH
2300}
2301
d5b33309
KH
2302DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2303 0, 1, 0,
2304 doc: /* Internal use only.
2305Return charset identification number of CHARSET. */)
2306 (charset)
2307 Lisp_Object charset;
4ed46869 2308{
d5b33309 2309 int id;
4ed46869 2310
d5b33309
KH
2311 CHECK_CHARSET_GET_ID (charset, id);
2312 return make_number (id);
4ed46869
KH
2313}
2314
7a84eee5
KH
2315struct charset_sort_data
2316{
2317 Lisp_Object charset;
2318 int id;
2319 int priority;
2320};
2321
2322static int
2323charset_compare (const void *d1, const void *d2)
2324{
2325 const struct charset_sort_data *data1 = d1, *data2 = d2;
2326 return (data1->priority - data2->priority);
2327}
2328
2329DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2330 doc: /* Sort charset list CHARSETS by a priority of each charset.
2331Return the sorted list. CHARSETS is modified by side effects.
2332See also `charset-priority-list' and `set-charset-priority'. */)
2333 (Lisp_Object charsets)
2334{
2335 Lisp_Object len = Flength (charsets);
2336 int n = XFASTINT (len), i, j, done;
2337 Lisp_Object tail, elt, attrs;
2338 struct charset_sort_data *sort_data;
2339 int id, min_id, max_id;
2340 USE_SAFE_ALLOCA;
2341
2342 if (n == 0)
2343 return Qnil;
2344 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2345 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2346 {
2347 elt = XCAR (tail);
2348 CHECK_CHARSET_GET_ATTR (elt, attrs);
2349 sort_data[i].charset = elt;
2350 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2351 if (i == 0)
2352 min_id = max_id = id;
2353 else if (id < min_id)
2354 min_id = id;
2355 else if (id > max_id)
2356 max_id = id;
2357 }
2358 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2359 done < n && CONSP (tail); tail = XCDR (tail), i++)
2360 {
2361 elt = XCAR (tail);
2362 id = XFASTINT (elt);
2363 if (id >= min_id && id <= max_id)
2364 for (j = 0; j < n; j++)
2365 if (sort_data[j].id == id)
2366 {
2367 sort_data[j].priority = i;
2368 done++;
2369 }
2370 }
2371 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2372 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2373 XSETCAR (tail, sort_data[i].charset);
2374 SAFE_FREE ();
2375 return charsets;
2376}
2377
4ed46869 2378\f
3263d5a2
KH
2379void
2380init_charset ()
4ed46869 2381{
c8f94403
GM
2382 Lisp_Object tempdir;
2383 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
6809ca75 2384 if (access ((char *) SDATA (tempdir), 0) < 0)
c8f94403
GM
2385 {
2386 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2387Emacs will not function correctly without the character map files.\n\
2388Please check your installation!\n",
2389 tempdir);
2390 /* TODO should this be a fatal error? (Bug#909) */
2391 }
2392
2393 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2394}
2395
4ed46869 2396
dfcf069d 2397void
4ed46869
KH
2398init_charset_once ()
2399{
2400 int i, j, k;
2401
3263d5a2
KH
2402 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2403 for (j = 0; j < ISO_MAX_CHARS; j++)
2404 for (k = 0; k < ISO_MAX_FINAL; k++)
2405 iso_charset_table[i][j][k] = -1;
4ed46869 2406
60383934 2407 for (i = 0; i < 256; i++)
b84ae584 2408 emacs_mule_charset[i] = -1;
4ed46869 2409
7c7dceee
KH
2410 charset_jisx0201_roman = -1;
2411 charset_jisx0208_1978 = -1;
2412 charset_jisx0208 = -1;
d32320c4 2413 charset_ksc5601 = -1;
4ed46869
KH
2414}
2415
2416#ifdef emacs
2417
dfcf069d 2418void
4ed46869
KH
2419syms_of_charset ()
2420{
3263d5a2
KH
2421 DEFSYM (Qcharsetp, "charsetp");
2422
2423 DEFSYM (Qascii, "ascii");
2424 DEFSYM (Qunicode, "unicode");
6c652beb 2425 DEFSYM (Qemacs, "emacs");
2fe1edd1 2426 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2427 DEFSYM (Qiso_8859_1, "iso-8859-1");
2428
2429 DEFSYM (Qgl, "gl");
2430 DEFSYM (Qgr, "gr");
2431
3263d5a2
KH
2432 staticpro (&Vcharset_ordered_list);
2433 Vcharset_ordered_list = Qnil;
2434
2435 staticpro (&Viso_2022_charset_list);
2436 Viso_2022_charset_list = Qnil;
2437
2438 staticpro (&Vemacs_mule_charset_list);
2439 Vemacs_mule_charset_list = Qnil;
2440
3943ed76
KH
2441 /* Don't staticpro them here. It's done in syms_of_fns. */
2442 QCtest = intern (":test");
2443 Qeq = intern ("eq");
2444
3263d5a2 2445 staticpro (&Vcharset_hash_table);
8f924df7
KH
2446 {
2447 Lisp_Object args[2];
2448 args[0] = QCtest;
2449 args[1] = Qeq;
2450 Vcharset_hash_table = Fmake_hash_table (2, args);
2451 }
3263d5a2
KH
2452
2453 charset_table_size = 128;
2454 charset_table = ((struct charset *)
2455 xmalloc (sizeof (struct charset) * charset_table_size));
2456 charset_table_used = 0;
2457
3263d5a2
KH
2458 defsubr (&Scharsetp);
2459 defsubr (&Smap_charset_chars);
2460 defsubr (&Sdefine_charset_internal);
2461 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2462 defsubr (&Scharset_plist);
2463 defsubr (&Sset_charset_plist);
2464 defsubr (&Sunify_charset);
3fac5a51 2465 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2466 defsubr (&Sdeclare_equiv_charset);
2467 defsubr (&Sfind_charset_region);
2468 defsubr (&Sfind_charset_string);
3263d5a2
KH
2469 defsubr (&Sdecode_char);
2470 defsubr (&Sencode_char);
4ed46869 2471 defsubr (&Ssplit_char);
3263d5a2 2472 defsubr (&Smake_char);
4ed46869 2473 defsubr (&Schar_charset);
90d7b74e 2474 defsubr (&Scharset_after);
4ed46869 2475 defsubr (&Siso_charset);
3263d5a2 2476 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2477 defsubr (&Scharset_priority_list);
2478 defsubr (&Sset_charset_priority);
d5b33309 2479 defsubr (&Scharset_id_internal);
7a84eee5 2480 defsubr (&Ssort_charsets);
3263d5a2 2481
4beef065 2482 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2483 doc: /* *List of directories to search for charset map files. */);
4beef065 2484 Vcharset_map_path = Qnil;
4ed46869 2485
ecca2aad
KH
2486 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2487 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2488 inhibit_load_charset_map = 0;
2489
4ed46869 2490 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2491 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2492 Vcharset_list = Qnil;
2493
6a9c90ec
KH
2494 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2495 doc: /* ISO639 language mnemonic symbol for the current language environment.
2496If the current language environment is for multiple languages (e.g. "Latin-1"),
2497the value may be a list of mnemonics. */);
2498 Vcurrent_iso639_language = Qnil;
2499
2fe1edd1
KH
2500 charset_ascii
2501 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2502 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2503 charset_iso_8859_1
2504 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2505 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2506 charset_unicode
73fbf2d9 2507 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2508 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2509 charset_emacs
2510 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2511 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2512 charset_eight_bit
2513 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2514 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2515 MAX_5_BYTE_CHAR + 1);
6809ca75 2516 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2517}
2518
2519#endif /* emacs */
cefd8c4f
KH
2520
2521/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2522 (do not change this comment) */