(fatal): Add a final \n if needed (bug#5596).
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
114f9c96 3 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 5 2005, 2006, 2007, 2008, 2009, 2010
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869 33#include <sys/types.h>
d7306fe6 34#include <setjmp.h>
4ed46869 35#include "lisp.h"
3263d5a2 36#include "character.h"
4ed46869
KH
37#include "charset.h"
38#include "coding.h"
fc6b09bf 39#include "disptab.h"
3263d5a2 40#include "buffer.h"
4ed46869 41
04c2f2c5 42/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 43
3263d5a2 44 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 45 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
4ed46869 49
3263d5a2
KH
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
04c2f2c5 53 charset_table as a struct charset.
4ed46869 54
3263d5a2 55*/
4ed46869 56
3263d5a2
KH
57/* List of all charsets. This variable is used only from Emacs
58 Lisp. */
4ed46869 59Lisp_Object Vcharset_list;
4ed46869 60
3263d5a2
KH
61/* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63Lisp_Object Vcharset_hash_table;
4ed46869 64
3263d5a2
KH
65/* Table of struct charset. */
66struct charset *charset_table;
4ed46869 67
3263d5a2 68static int charset_table_size;
5af5dd92 69static int charset_table_used;
4ed46869 70
3263d5a2 71Lisp_Object Qcharsetp;
4ed46869 72
3263d5a2
KH
73/* Special charset symbols. */
74Lisp_Object Qascii;
2fe1edd1 75Lisp_Object Qeight_bit;
3263d5a2
KH
76Lisp_Object Qiso_8859_1;
77Lisp_Object Qunicode;
6c652beb 78Lisp_Object Qemacs;
4ed46869 79
3263d5a2
KH
80/* The corresponding charsets. */
81int charset_ascii;
2fe1edd1 82int charset_eight_bit;
3263d5a2
KH
83int charset_iso_8859_1;
84int charset_unicode;
6c652beb 85int charset_emacs;
b0e3cf2b 86
7c7dceee
KH
87/* The other special charsets. */
88int charset_jisx0201_roman;
89int charset_jisx0208_1978;
90int charset_jisx0208;
d32320c4 91int charset_ksc5601;
c1a08b4c 92
3263d5a2
KH
93/* Value of charset attribute `charset-iso-plane'. */
94Lisp_Object Qgl, Qgr;
c1a08b4c 95
d1a04588
KH
96/* Charset of unibyte characters. */
97int charset_unibyte;
4ed46869 98
3263d5a2
KH
99/* List of charsets ordered by the priority. */
100Lisp_Object Vcharset_ordered_list;
4ed46869 101
6a9c90ec
KH
102/* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104Lisp_Object Vcharset_non_preferred_head;
105
dbbb237d 106/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 107 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
108 -1. */
109unsigned short charset_ordered_list_tick;
4ed46869 110
3263d5a2
KH
111/* List of iso-2022 charsets. */
112Lisp_Object Viso_2022_charset_list;
35e623fb 113
3263d5a2
KH
114/* List of emacs-mule charsets. */
115Lisp_Object Vemacs_mule_charset_list;
116
117struct charset *emacs_mule_charset[256];
4ed46869
KH
118
119/* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
121int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
4beef065 123Lisp_Object Vcharset_map_path;
3263d5a2 124
ecca2aad
KH
125/* If nonzero, don't load charset maps. */
126int inhibit_load_charset_map;
3263d5a2 127
6a9c90ec
KH
128Lisp_Object Vcurrent_iso639_language;
129
64165ae2
DL
130/* Defined in chartab.c */
131extern void
132map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to));
136
69f8de5b
KH
137#define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
820ee249
KH
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
3263d5a2
KH
152 : -1)
153
154
155/* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
820ee249
KH
158#define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
4cf9710d 171
ecca2aad
KH
172/* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
8a73a704 174
ecca2aad
KH
175static struct
176{
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205} *temp_charset_work;
206
207#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
046b1f03 229\f
93bcb785 230
e9ce014c
KH
231/* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
3263d5a2 233int charset_map_loaded;
4ed46869 234
e9ce014c 235struct charset_map_entries
4ed46869 236{
e9ce014c
KH
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242};
243
ecca2aad
KH
244/* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
4cf9710d 253
ecca2aad
KH
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 259
ecca2aad 260 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 261
ecca2aad
KH
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267*/
4ed46869 268
3263d5a2 269static void
e9ce014c 270load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 271 struct charset *charset;
e9ce014c
KH
272 struct charset_map_entries *entries;
273 int n_entries;
3263d5a2 274 int control_flag;
4ed46869 275{
3263d5a2 276 Lisp_Object vec, table;
3263d5a2
KH
277 unsigned max_code = CHARSET_MAX_CODE (charset);
278 int ascii_compatible_p = charset->ascii_compatible_p;
279 int min_char, max_char, nonascii_min_char;
3263d5a2 280 int i;
3263d5a2 281 unsigned char *fast_map = charset->fast_map;
99529c2c 282
e9ce014c
KH
283 if (n_entries <= 0)
284 return;
285
ecca2aad 286 if (control_flag)
8ac5a9cc 287 {
ecca2aad
KH
288 if (! inhibit_load_charset_map)
289 {
290 if (control_flag == 1)
291 {
292 if (charset->method == CHARSET_METHOD_MAP)
293 {
294 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 295
ecca2aad
KH
296 vec = CHARSET_DECODER (charset)
297 = Fmake_vector (make_number (n), make_number (-1));
298 }
299 else
300 {
301 char_table_set_range (Vchar_unify_table,
302 charset->min_char, charset->max_char,
303 Qnil);
304 }
305 }
306 else
307 {
308 table = Fmake_char_table (Qnil, Qnil);
309 if (charset->method == CHARSET_METHOD_MAP)
310 CHARSET_ENCODER (charset) = table;
311 else
312 CHARSET_DEUNIFIER (charset) = table;
313 }
314 }
315 else
316 {
317 if (! temp_charset_work)
318 temp_charset_work = malloc (sizeof (*temp_charset_work));
319 if (control_flag == 1)
320 {
321 memset (temp_charset_work->table.decoder, -1,
322 sizeof (int) * 0x10000);
ecca2aad
KH
323 }
324 else
325 {
326 memset (temp_charset_work->table.encoder, 0,
327 sizeof (unsigned short) * 0x20000);
328 temp_charset_work->zero_index_char = -1;
329 }
330 temp_charset_work->current = charset;
331 temp_charset_work->for_encoder = (control_flag == 2);
332 control_flag += 2;
333 }
3263d5a2 334 charset_map_loaded = 1;
2e344af3 335 }
6662e69b 336
e9ce014c 337 min_char = max_char = entries->entry[0].c;
3263d5a2 338 nonascii_min_char = MAX_CHAR;
e9ce014c 339 for (i = 0; i < n_entries; i++)
2e344af3 340 {
e9ce014c 341 unsigned from, to;
3b4f4446
KH
342 int from_index, to_index;
343 int from_c, to_c;
e9ce014c 344 int idx = i % 0x10000;
3263d5a2 345
e9ce014c
KH
346 if (i > 0 && idx == 0)
347 entries = entries->next;
348 from = entries->entry[idx].from;
349 to = entries->entry[idx].to;
3b4f4446
KH
350 from_c = entries->entry[idx].c;
351 from_index = CODE_POINT_TO_INDEX (charset, from);
352 if (from == to)
6662e69b 353 {
3b4f4446
KH
354 to_index = from_index;
355 to_c = from_c;
6662e69b 356 }
3b4f4446 357 else
6662e69b 358 {
3b4f4446
KH
359 to_index = CODE_POINT_TO_INDEX (charset, to);
360 to_c = from_c + (to_index - from_index);
6662e69b 361 }
3b4f4446
KH
362 if (from_index < 0 || to_index < 0)
363 continue;
3263d5a2 364
ecca2aad
KH
365 if (to_c > max_char)
366 max_char = to_c;
367 else if (from_c < min_char)
368 min_char = from_c;
6662e69b 369
ecca2aad
KH
370 if (control_flag == 1)
371 {
372 if (charset->method == CHARSET_METHOD_MAP)
373 for (; from_index <= to_index; from_index++, from_c++)
374 ASET (vec, from_index, make_number (from_c));
375 else
376 for (; from_index <= to_index; from_index++, from_c++)
377 CHAR_TABLE_SET (Vchar_unify_table,
378 CHARSET_CODE_OFFSET (charset) + from_index,
379 make_number (from_c));
380 }
381 else if (control_flag == 2)
382 {
383 if (charset->method == CHARSET_METHOD_MAP
384 && CHARSET_COMPACT_CODES_P (charset))
385 for (; from_index <= to_index; from_index++, from_c++)
386 {
387 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
388
389 if (NILP (CHAR_TABLE_REF (table, from_c)))
390 CHAR_TABLE_SET (table, from_c, make_number (code));
391 }
392 else
393 for (; from_index <= to_index; from_index++, from_c++)
394 {
395 if (NILP (CHAR_TABLE_REF (table, from_c)))
396 CHAR_TABLE_SET (table, from_c, make_number (from_index));
397 }
398 }
399 else if (control_flag == 3)
400 for (; from_index <= to_index; from_index++, from_c++)
401 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
402 else if (control_flag == 4)
403 for (; from_index <= to_index; from_index++, from_c++)
404 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
405 else /* control_flag == 0 */
406 {
3b4f4446
KH
407 if (ascii_compatible_p)
408 {
409 if (! ASCII_BYTE_P (from_c))
410 {
411 if (from_c < nonascii_min_char)
412 nonascii_min_char = from_c;
413 }
414 else if (! ASCII_BYTE_P (to_c))
415 {
416 nonascii_min_char = 0x80;
417 }
418 }
177c0ea7 419
ecca2aad
KH
420 for (; from_c <= to_c; from_c++)
421 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 422 }
8ac5a9cc 423 }
3263d5a2 424
ecca2aad 425 if (control_flag == 0)
4ed46869 426 {
3263d5a2
KH
427 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
428 ? nonascii_min_char : min_char);
429 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 430 }
ecca2aad
KH
431 else if (control_flag == 4)
432 {
433 temp_charset_work->min_char = min_char;
434 temp_charset_work->max_char = max_char;
435 }
4ed46869
KH
436}
437
12bcae05 438
3263d5a2
KH
439/* Read a hexadecimal number (preceded by "0x") from the file FP while
440 paying attention to comment charcter '#'. */
12bcae05 441
3263d5a2
KH
442static INLINE unsigned
443read_hex (fp, eof)
444 FILE *fp;
445 int *eof;
12bcae05 446{
3263d5a2
KH
447 int c;
448 unsigned n;
12bcae05 449
3263d5a2
KH
450 while ((c = getc (fp)) != EOF)
451 {
69f8de5b 452 if (c == '#')
3263d5a2
KH
453 {
454 while ((c = getc (fp)) != EOF && c != '\n');
455 }
456 else if (c == '0')
457 {
458 if ((c = getc (fp)) == EOF || c == 'x')
459 break;
460 }
8f924df7 461 }
3263d5a2
KH
462 if (c == EOF)
463 {
464 *eof = 1;
465 return 0;
466 }
467 *eof = 0;
468 n = 0;
469 if (c == 'x')
470 while ((c = getc (fp)) != EOF && isxdigit (c))
471 n = ((n << 4)
472 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
473 else
474 while ((c = getc (fp)) != EOF && isdigit (c))
475 n = (n * 10) + c - '0';
e9ce014c
KH
476 if (c != EOF)
477 ungetc (c, fp);
3263d5a2
KH
478 return n;
479}
12bcae05 480
dde2559c 481extern Lisp_Object Qfile_name_handler_alist;
537efd8d 482
3263d5a2 483/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
484 Each line of MAPFILE has this form
485 0xAAAA 0xCCCC
486 where 0xAAAA is a code-point and 0xCCCC is the corresponding
487 character code, or this form
488 0xAAAA-0xBBBB 0xCCCC
489 where 0xAAAA and 0xBBBB are code-points specifying a range, and
490 0xCCCC is the first character code of the range.
4ed46869 491
3263d5a2
KH
492 The returned vector has this form:
493 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 494 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
495 range.
496
18a10a21
JB
497 Note that this function uses `openp' to open MAPFILE but ignores
498 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 499
c449997d 500extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
4ed46869 501
e9ce014c
KH
502static void
503load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
504 struct charset *charset;
505 Lisp_Object mapfile;
e9ce014c 506 int control_flag;
4ed46869 507{
e9ce014c
KH
508 unsigned min_code = CHARSET_MIN_CODE (charset);
509 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
510 int fd;
511 FILE *fp;
3263d5a2
KH
512 int eof;
513 Lisp_Object suffixes;
e9ce014c 514 struct charset_map_entries *head, *entries;
a2f3eb19
CY
515 int n_entries, count;
516 USE_SAFE_ALLOCA;
4ed46869 517
3263d5a2
KH
518 suffixes = Fcons (build_string (".map"),
519 Fcons (build_string (".TXT"), Qnil));
4ed46869 520
a2f3eb19 521 count = SPECPDL_INDEX ();
dde2559c 522 specbind (Qfile_name_handler_alist, Qnil);
4beef065 523 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
dde2559c 524 unbind_to (count, Qnil);
3263d5a2
KH
525 if (fd < 0
526 || ! (fp = fdopen (fd, "r")))
8458d4c1 527 error ("Failure in loading charset map: %S", SDATA (mapfile));
4ed46869 528
a2f3eb19
CY
529 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
530 large (larger than MAX_ALLOCA). */
531 SAFE_ALLOCA (head, struct charset_map_entries *,
532 sizeof (struct charset_map_entries));
533 entries = head;
534
e9ce014c 535 n_entries = 0;
3263d5a2
KH
536 eof = 0;
537 while (1)
538 {
e9ce014c
KH
539 unsigned from, to;
540 int c;
541 int idx;
4ed46869 542
e9ce014c 543 from = read_hex (fp, &eof);
3263d5a2
KH
544 if (eof)
545 break;
e9ce014c
KH
546 if (getc (fp) == '-')
547 to = read_hex (fp, &eof);
548 else
549 to = from;
550 c = (int) read_hex (fp, &eof);
ac4137cc 551
e9ce014c
KH
552 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
553 continue;
ac4137cc 554
e9ce014c 555 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 556 {
a2f3eb19
CY
557 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
558 sizeof (struct charset_map_entries));
e9ce014c 559 entries = entries->next;
3263d5a2 560 }
e9ce014c
KH
561 idx = n_entries % 0x10000;
562 entries->entry[idx].from = from;
563 entries->entry[idx].to = to;
564 entries->entry[idx].c = c;
565 n_entries++;
3263d5a2
KH
566 }
567 fclose (fp);
568 close (fd);
177c0ea7 569
e9ce014c 570 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 571 SAFE_FREE ();
4ed46869
KH
572}
573
e9ce014c
KH
574static void
575load_charset_map_from_vector (charset, vec, control_flag)
576 struct charset *charset;
577 Lisp_Object vec;
578 int control_flag;
23d2a7f1 579{
e9ce014c
KH
580 unsigned min_code = CHARSET_MIN_CODE (charset);
581 unsigned max_code = CHARSET_MAX_CODE (charset);
582 struct charset_map_entries *head, *entries;
583 int n_entries;
584 int len = ASIZE (vec);
585 int i;
a2f3eb19 586 USE_SAFE_ALLOCA;
23d2a7f1 587
e9ce014c 588 if (len % 2 == 1)
3263d5a2 589 {
e9ce014c
KH
590 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
591 return;
3263d5a2 592 }
35e623fb 593
a2f3eb19
CY
594 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
595 large (larger than MAX_ALLOCA). */
596 SAFE_ALLOCA (head, struct charset_map_entries *,
597 sizeof (struct charset_map_entries));
598 entries = head;
599
e9ce014c
KH
600 n_entries = 0;
601 for (i = 0; i < len; i += 2)
35e623fb 602 {
e9ce014c
KH
603 Lisp_Object val, val2;
604 unsigned from, to;
605 int c;
606 int idx;
d2665018 607
e9ce014c
KH
608 val = AREF (vec, i);
609 if (CONSP (val))
bbf12bb3 610 {
e9ce014c
KH
611 val2 = XCDR (val);
612 val = XCAR (val);
613 CHECK_NATNUM (val);
614 CHECK_NATNUM (val2);
615 from = XFASTINT (val);
616 to = XFASTINT (val2);
bbf12bb3 617 }
e9ce014c 618 else
bbf12bb3 619 {
e9ce014c
KH
620 CHECK_NATNUM (val);
621 from = to = XFASTINT (val);
bbf12bb3 622 }
e9ce014c
KH
623 val = AREF (vec, i + 1);
624 CHECK_NATNUM (val);
625 c = XFASTINT (val);
76d7b829 626
e9ce014c
KH
627 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
628 continue;
76d7b829 629
dcc694d7 630 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 631 {
a2f3eb19
CY
632 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
633 sizeof (struct charset_map_entries));
e9ce014c
KH
634 entries = entries->next;
635 }
636 idx = n_entries % 0x10000;
637 entries->entry[idx].from = from;
638 entries->entry[idx].to = to;
639 entries->entry[idx].c = c;
640 n_entries++;
641 }
76d7b829 642
e9ce014c 643 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 644 SAFE_FREE ();
ac4137cc
KH
645}
646
ecca2aad
KH
647
648/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
649 map it is (see the comment of load_charset_map for the detail). */
650
3263d5a2 651static void
ecca2aad 652load_charset (charset, control_flag)
3263d5a2 653 struct charset *charset;
ecca2aad 654 int control_flag;
76d7b829 655{
ecca2aad 656 Lisp_Object map;
76d7b829 657
ecca2aad
KH
658 if (inhibit_load_charset_map
659 && temp_charset_work
660 && charset == temp_charset_work->current
56f00ed2 661 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
662 return;
663
664 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
665 map = CHARSET_MAP (charset);
666 else if (CHARSET_UNIFIED_P (charset))
667 map = CHARSET_UNIFY_MAP (charset);
668 if (STRINGP (map))
669 load_charset_map_from_file (charset, map, control_flag);
670 else
671 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 672}
76d7b829 673
3263d5a2
KH
674
675DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
676 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
677 (object)
678 Lisp_Object object;
23d2a7f1 679{
3263d5a2 680 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
681}
682
4ed46869 683
ecca2aad
KH
684void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
685 Lisp_Object function, Lisp_Object arg,
686 unsigned from, unsigned to));
687
688void
689map_charset_for_dump (c_function, function, arg, from, to)
690 void (*c_function) (Lisp_Object, Lisp_Object);
691 Lisp_Object function, arg;
692 unsigned from, to;
693{
694 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
695 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
696 Lisp_Object range;
697 int c, stop;
698 struct gcpro gcpro1;
699
700 range = Fcons (Qnil, Qnil);
701 GCPRO1 (range);
702
703 c = temp_charset_work->min_char;
704 stop = (temp_charset_work->max_char < 0x20000
705 ? temp_charset_work->max_char : 0xFFFF);
706
707 while (1)
708 {
709 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
710
711 if (index >= from_idx && index <= to_idx)
712 {
713 if (NILP (XCAR (range)))
714 XSETCAR (range, make_number (c));
715 }
716 else if (! NILP (XCAR (range)))
717 {
718 XSETCDR (range, make_number (c - 1));
719 if (c_function)
720 (*c_function) (arg, range);
721 else
722 call2 (function, range, arg);
723 XSETCAR (range, Qnil);
724 }
725 if (c == stop)
726 {
727 if (c == temp_charset_work->max_char)
728 {
729 if (! NILP (XCAR (range)))
730 {
731 XSETCDR (range, make_number (c));
732 if (c_function)
733 (*c_function) (arg, range);
734 else
735 call2 (function, range, arg);
736 }
737 break;
738 }
739 c = 0x1FFFF;
740 stop = temp_charset_work->max_char;
741 }
742 c++;
743 }
c542407d 744 UNGCPRO;
ecca2aad
KH
745}
746
4ed46869 747void
374c5cfd
KH
748map_charset_chars (c_function, function, arg,
749 charset, from, to)
750 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
751 Lisp_Object function, arg;
752 struct charset *charset;
753 unsigned from, to;
4ed46869 754{
3263d5a2 755 Lisp_Object range;
374c5cfd 756 int partial;
3263d5a2 757
374c5cfd
KH
758 partial = (from > CHARSET_MIN_CODE (charset)
759 || to < CHARSET_MAX_CODE (charset));
760
3263d5a2 761 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 762 {
374c5cfd
KH
763 int from_idx = CODE_POINT_TO_INDEX (charset, from);
764 int to_idx = CODE_POINT_TO_INDEX (charset, to);
765 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
766 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
767
ecca2aad
KH
768 if (CHARSET_UNIFIED_P (charset))
769 {
770 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
771 load_charset (charset, 2);
772 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
773 map_char_table_for_charset (c_function, function,
774 CHARSET_DEUNIFIER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
778 }
779
374c5cfd 780 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 781 if (NILP (function))
5af5dd92 782 (*c_function) (arg, range);
3263d5a2
KH
783 else
784 call2 (function, range, arg);
c83ef371 785 }
3263d5a2
KH
786 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
787 {
788 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
789 load_charset (charset, 2);
790 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
791 map_char_table_for_charset (c_function, function,
792 CHARSET_ENCODER (charset), arg,
793 partial ? charset : NULL, from, to);
794 else
795 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 796 }
374c5cfd 797 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 798 {
374c5cfd
KH
799 Lisp_Object subset_info;
800 int offset;
801
802 subset_info = CHARSET_SUBSET (charset);
803 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
804 offset = XINT (AREF (subset_info, 3));
805 from -= offset;
806 if (from < XFASTINT (AREF (subset_info, 1)))
807 from = XFASTINT (AREF (subset_info, 1));
808 to -= offset;
809 if (to > XFASTINT (AREF (subset_info, 2)))
810 to = XFASTINT (AREF (subset_info, 2));
811 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 812 }
374c5cfd
KH
813 else /* i.e. CHARSET_METHOD_SUPERSET */
814 {
815 Lisp_Object parents;
4ed46869 816
374c5cfd
KH
817 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
818 parents = XCDR (parents))
bbf12bb3 819 {
374c5cfd
KH
820 int offset;
821 unsigned this_from, this_to;
822
823 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
824 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
825 this_from = from > offset ? from - offset : 0;
826 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
827 if (this_from < CHARSET_MIN_CODE (charset))
828 this_from = CHARSET_MIN_CODE (charset);
829 if (this_to > CHARSET_MAX_CODE (charset))
830 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
831 map_charset_chars (c_function, function, arg, charset,
832 this_from, this_to);
bbf12bb3 833 }
35e623fb 834 }
4ed46869
KH
835}
836
374c5cfd 837DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 838 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 839FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 840argument ARG.
4ed46869 841
374c5cfd
KH
842RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
843characters contained in CHARSET.
4ed46869 844
374c5cfd 845The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 846range of code points (in CHARSET) of target characters. */)
374c5cfd
KH
847 (function, charset, arg, from_code, to_code)
848 Lisp_Object function, charset, arg, from_code, to_code;
4ed46869 849{
374c5cfd 850 struct charset *cs;
16fed1fc 851 unsigned from, to;
4ed46869 852
374c5cfd
KH
853 CHECK_CHARSET_GET_CHARSET (charset, cs);
854 if (NILP (from_code))
16fed1fc 855 from = CHARSET_MIN_CODE (cs);
970b7474 856 else
4ed46869 857 {
970b7474
KH
858 CHECK_NATNUM (from_code);
859 from = XINT (from_code);
860 if (from < CHARSET_MIN_CODE (cs))
861 from = CHARSET_MIN_CODE (cs);
4ed46869 862 }
374c5cfd 863 if (NILP (to_code))
970b7474 864 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
865 else
866 {
970b7474
KH
867 CHECK_NATNUM (to_code);
868 to = XINT (to_code);
869 if (to > CHARSET_MAX_CODE (cs))
870 to = CHARSET_MAX_CODE (cs);
4ed46869 871 }
16fed1fc 872 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 873 return Qnil;
35e623fb 874}
4ed46869 875
4ed46869 876
3263d5a2
KH
877/* Define a charset according to the arguments. The Nth argument is
878 the Nth attribute of the charset (the last attribute `charset-id'
879 is not included). See the docstring of `define-charset' for the
880 detail. */
4ed46869 881
3263d5a2
KH
882DEFUN ("define-charset-internal", Fdefine_charset_internal,
883 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
884 doc: /* For internal use only.
885usage: (define-charset-internal ...) */)
3263d5a2
KH
886 (nargs, args)
887 int nargs;
888 Lisp_Object *args;
4ed46869 889{
3263d5a2
KH
890 /* Charset attr vector. */
891 Lisp_Object attrs;
892 Lisp_Object val;
893 unsigned hash_code;
894 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 895 int i, j;
3263d5a2
KH
896 struct charset charset;
897 int id;
898 int dimension;
899 int new_definition_p;
900 int nchars;
901
902 if (nargs != charset_arg_max)
903 return Fsignal (Qwrong_number_of_arguments,
904 Fcons (intern ("define-charset-internal"),
905 make_number (nargs)));
906
907 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
908
909 CHECK_SYMBOL (args[charset_arg_name]);
910 ASET (attrs, charset_name, args[charset_arg_name]);
911
912 val = args[charset_arg_code_space];
913 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 914 {
3263d5a2
KH
915 int min_byte, max_byte;
916
917 min_byte = XINT (Faref (val, make_number (i * 2)));
918 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
919 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
920 error ("Invalid :code-space value");
921 charset.code_space[i * 4] = min_byte;
922 charset.code_space[i * 4 + 1] = max_byte;
923 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
924 nchars *= charset.code_space[i * 4 + 2];
925 charset.code_space[i * 4 + 3] = nchars;
926 if (max_byte > 0)
927 dimension = i + 1;
928 }
4ed46869 929
3263d5a2
KH
930 val = args[charset_arg_dimension];
931 if (NILP (val))
932 charset.dimension = dimension;
933 else
4ed46869 934 {
3263d5a2
KH
935 CHECK_NATNUM (val);
936 charset.dimension = XINT (val);
937 if (charset.dimension < 1 || charset.dimension > 4)
938 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
939 }
940
3263d5a2
KH
941 charset.code_linear_p
942 = (charset.dimension == 1
943 || (charset.code_space[2] == 256
944 && (charset.dimension == 2
945 || (charset.code_space[6] == 256
946 && (charset.dimension == 3
947 || charset.code_space[10] == 256)))));
948
69f8de5b 949 if (! charset.code_linear_p)
4ed46869 950 {
69f8de5b 951 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 952 bzero (charset.code_space_mask, 256);
69f8de5b
KH
953 for (i = 0; i < 4; i++)
954 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
955 j++)
956 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
957 }
958
3263d5a2 959 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 960
3263d5a2
KH
961 charset.min_code = (charset.code_space[0]
962 | (charset.code_space[4] << 8)
963 | (charset.code_space[8] << 16)
964 | (charset.code_space[12] << 24));
965 charset.max_code = (charset.code_space[1]
966 | (charset.code_space[5] << 8)
967 | (charset.code_space[9] << 16)
968 | (charset.code_space[13] << 24));
820ee249 969 charset.char_index_offset = 0;
8a73a704 970
820ee249
KH
971 val = args[charset_arg_min_code];
972 if (! NILP (val))
973 {
974 unsigned code;
fdb82f93 975
820ee249
KH
976 if (INTEGERP (val))
977 code = XINT (val);
978 else
979 {
980 CHECK_CONS (val);
8f924df7
KH
981 CHECK_NUMBER_CAR (val);
982 CHECK_NUMBER_CDR (val);
820ee249
KH
983 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
984 }
985 if (code < charset.min_code
986 || code > charset.max_code)
987 args_out_of_range_3 (make_number (charset.min_code),
988 make_number (charset.max_code), val);
989 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
990 charset.min_code = code;
991 }
3fac5a51 992
820ee249
KH
993 val = args[charset_arg_max_code];
994 if (! NILP (val))
3fac5a51 995 {
820ee249
KH
996 unsigned code;
997
998 if (INTEGERP (val))
999 code = XINT (val);
1000 else
1001 {
1002 CHECK_CONS (val);
8f924df7
KH
1003 CHECK_NUMBER_CAR (val);
1004 CHECK_NUMBER_CDR (val);
820ee249
KH
1005 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
1006 }
1007 if (code < charset.min_code
1008 || code > charset.max_code)
1009 args_out_of_range_3 (make_number (charset.min_code),
1010 make_number (charset.max_code), val);
1011 charset.max_code = code;
3fac5a51 1012 }
3fac5a51 1013
ecca2aad 1014 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 1015
3263d5a2
KH
1016 val = args[charset_arg_invalid_code];
1017 if (NILP (val))
1018 {
1019 if (charset.min_code > 0)
1020 charset.invalid_code = 0;
bbf12bb3
KH
1021 else
1022 {
3263d5a2
KH
1023 XSETINT (val, charset.max_code + 1);
1024 if (XINT (val) == charset.max_code + 1)
1025 charset.invalid_code = charset.max_code + 1;
1026 else
1027 error ("Attribute :invalid-code must be specified");
76d7b829 1028 }
76d7b829 1029 }
3263d5a2
KH
1030 else
1031 {
1032 CHECK_NATNUM (val);
1033 charset.invalid_code = XFASTINT (val);
1034 }
4ed46869 1035
3263d5a2
KH
1036 val = args[charset_arg_iso_final];
1037 if (NILP (val))
1038 charset.iso_final = -1;
1039 else
1040 {
1041 CHECK_NUMBER (val);
1042 if (XINT (val) < '0' || XINT (val) > 127)
1043 error ("Invalid iso-final-char: %d", XINT (val));
1044 charset.iso_final = XINT (val);
1045 }
4ed46869 1046
3263d5a2
KH
1047 val = args[charset_arg_iso_revision];
1048 if (NILP (val))
1049 charset.iso_revision = -1;
1050 else
4ed46869 1051 {
3263d5a2
KH
1052 CHECK_NUMBER (val);
1053 if (XINT (val) > 63)
1054 args_out_of_range (make_number (63), val);
1055 charset.iso_revision = XINT (val);
4ed46869 1056 }
4ed46869 1057
3263d5a2
KH
1058 val = args[charset_arg_emacs_mule_id];
1059 if (NILP (val))
1060 charset.emacs_mule_id = -1;
4ed46869
KH
1061 else
1062 {
3263d5a2
KH
1063 CHECK_NATNUM (val);
1064 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1065 error ("Invalid emacs-mule-id: %d", XINT (val));
1066 charset.emacs_mule_id = XINT (val);
c83ef371 1067 }
f6302ac9 1068
3263d5a2 1069 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1070
3263d5a2 1071 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1072
3263d5a2
KH
1073 charset.unified_p = 0;
1074
1075 bzero (charset.fast_map, sizeof (charset.fast_map));
1076
1077 if (! NILP (args[charset_arg_code_offset]))
1078 {
1079 val = args[charset_arg_code_offset];
1080 CHECK_NUMBER (val);
1081
1082 charset.method = CHARSET_METHOD_OFFSET;
1083 charset.code_offset = XINT (val);
1084
1085 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1086 charset.min_char = i + charset.code_offset;
1087 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1088 charset.max_char = i + charset.code_offset;
1089 if (charset.max_char > MAX_CHAR)
1090 error ("Unsupported max char: %d", charset.max_char);
1091
f148205f
KH
1092 i = (charset.min_char >> 7) << 7;
1093 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1094 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1095 i = (i >> 12) << 12;
3263d5a2
KH
1096 for (; i <= charset.max_char; i += 0x1000)
1097 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1098 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1099 charset.ascii_compatible_p = 1;
3263d5a2
KH
1100 }
1101 else if (! NILP (args[charset_arg_map]))
1102 {
1103 val = args[charset_arg_map];
1104 ASET (attrs, charset_map, val);
ecca2aad 1105 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1106 }
374c5cfd 1107 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1108 {
374c5cfd
KH
1109 Lisp_Object parent;
1110 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1111 struct charset *parent_charset;
1112
1113 val = args[charset_arg_subset];
1114 parent = Fcar (val);
1115 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1116 parent_min_code = Fnth (make_number (1), val);
1117 CHECK_NATNUM (parent_min_code);
1118 parent_max_code = Fnth (make_number (2), val);
1119 CHECK_NATNUM (parent_max_code);
1120 parent_code_offset = Fnth (make_number (3), val);
1121 CHECK_NUMBER (parent_code_offset);
1122 val = Fmake_vector (make_number (4), Qnil);
1123 ASET (val, 0, make_number (parent_charset->id));
1124 ASET (val, 1, parent_min_code);
1125 ASET (val, 2, parent_max_code);
1126 ASET (val, 3, parent_code_offset);
1127 ASET (attrs, charset_subset, val);
1128
1129 charset.method = CHARSET_METHOD_SUBSET;
1130 /* Here, we just copy the parent's fast_map. It's not accurate,
1131 but at least it works for quickly detecting which character
1132 DOESN'T belong to this charset. */
1133 for (i = 0; i < 190; i++)
1134 charset.fast_map[i] = parent_charset->fast_map[i];
1135
1136 /* We also copy these for parents. */
1137 charset.min_char = parent_charset->min_char;
1138 charset.max_char = parent_charset->max_char;
1139 }
1140 else if (! NILP (args[charset_arg_superset]))
0282eb69 1141 {
374c5cfd
KH
1142 val = args[charset_arg_superset];
1143 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1144 val = Fcopy_sequence (val);
374c5cfd 1145 ASET (attrs, charset_superset, val);
3263d5a2
KH
1146
1147 charset.min_char = MAX_CHAR;
1148 charset.max_char = 0;
1149 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1150 {
3263d5a2
KH
1151 Lisp_Object elt, car_part, cdr_part;
1152 int this_id, offset;
1153 struct charset *this_charset;
2e344af3 1154
3263d5a2
KH
1155 elt = Fcar (val);
1156 if (CONSP (elt))
2e344af3 1157 {
3263d5a2
KH
1158 car_part = XCAR (elt);
1159 cdr_part = XCDR (elt);
1160 CHECK_CHARSET_GET_ID (car_part, this_id);
1161 CHECK_NUMBER (cdr_part);
1162 offset = XINT (cdr_part);
177c0ea7 1163 }
3263d5a2 1164 else
4ed46869 1165 {
3263d5a2
KH
1166 CHECK_CHARSET_GET_ID (elt, this_id);
1167 offset = 0;
4ed46869 1168 }
3263d5a2
KH
1169 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1170
1171 this_charset = CHARSET_FROM_ID (this_id);
1172 if (charset.min_char > this_charset->min_char)
1173 charset.min_char = this_charset->min_char;
1174 if (charset.max_char < this_charset->max_char)
1175 charset.max_char = this_charset->max_char;
1176 for (i = 0; i < 190; i++)
1177 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1178 }
0282eb69 1179 }
2e344af3 1180 else
3263d5a2 1181 error ("None of :code-offset, :map, :parents are specified");
05505664 1182
3263d5a2
KH
1183 val = args[charset_arg_unify_map];
1184 if (! NILP (val) && !STRINGP (val))
1185 CHECK_VECTOR (val);
1186 ASET (attrs, charset_unify_map, val);
05505664 1187
3263d5a2
KH
1188 CHECK_LIST (args[charset_arg_plist]);
1189 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1190
3263d5a2
KH
1191 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1192 &hash_code);
1193 if (charset.hash_index >= 0)
1194 {
1195 new_definition_p = 0;
4f65af01 1196 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1197 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1198 }
1a45ff10 1199 else
3263d5a2
KH
1200 {
1201 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1202 hash_code);
1203 if (charset_table_used == charset_table_size)
1204 {
2fe1edd1
KH
1205 struct charset *new_table
1206 = (struct charset *) xmalloc (sizeof (struct charset)
1207 * (charset_table_size + 16));
1208 bcopy (charset_table, new_table,
1209 sizeof (struct charset) * charset_table_size);
1210 charset_table_size += 16;
1211 charset_table = new_table;
3263d5a2
KH
1212 }
1213 id = charset_table_used++;
3263d5a2
KH
1214 new_definition_p = 1;
1215 }
2e344af3 1216
4f65af01 1217 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1218 charset.id = id;
1219 charset_table[id] = charset;
2e344af3 1220
ecca2aad 1221 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1222 {
1223 load_charset (&charset, 0);
1224 charset_table[id] = charset;
1225 }
ecca2aad 1226
3263d5a2 1227 if (charset.iso_final >= 0)
4ed46869 1228 {
3263d5a2
KH
1229 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1230 charset.iso_final) = id;
1231 if (new_definition_p)
1232 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1233 Fcons (make_number (id), Qnil));
7c7dceee
KH
1234 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1235 charset_jisx0201_roman = id;
1236 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1237 charset_jisx0208_1978 = id;
1238 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1239 charset_jisx0208 = id;
d32320c4
KH
1240 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1241 charset_ksc5601 = id;
4ed46869 1242 }
d0cf2d48 1243
3263d5a2 1244 if (charset.emacs_mule_id >= 0)
4ed46869 1245 {
3263d5a2 1246 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1247 if (charset.emacs_mule_id < 0xA0)
1248 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1249 else
1250 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1251 if (new_definition_p)
1252 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1253 Fcons (make_number (id), Qnil));
4ed46869
KH
1254 }
1255
3263d5a2
KH
1256 if (new_definition_p)
1257 {
1258 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1259 if (charset.supplementary_p)
1260 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1261 Fcons (make_number (id), Qnil));
1262 else
880820fe
KH
1263 {
1264 Lisp_Object tail;
1265
1266 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1267 {
1268 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1269
1270 if (cs->supplementary_p)
1271 break;
1272 }
1273 if (EQ (tail, Vcharset_ordered_list))
1274 Vcharset_ordered_list = Fcons (make_number (id),
1275 Vcharset_ordered_list);
1276 else if (NILP (tail))
1277 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1278 Fcons (make_number (id), Qnil));
1279 else
1280 {
1281 val = Fcons (XCAR (tail), XCDR (tail));
1282 XSETCDR (tail, val);
1283 XSETCAR (tail, make_number (id));
1284 }
1285 }
dbbb237d 1286 charset_ordered_list_tick++;
3263d5a2 1287 }
4ed46869 1288
3263d5a2 1289 return Qnil;
4ed46869
KH
1290}
1291
2fe1edd1
KH
1292
1293/* Same as Fdefine_charset_internal but arguments are more convenient
1294 to call from C (typically in syms_of_charset). This can define a
1295 charset of `offset' method only. Return the ID of the new
1296 charset. */
1297
1298static int
1299define_charset_internal (name, dimension, code_space, min_code, max_code,
1300 iso_final, iso_revision, emacs_mule_id,
7acf89e6 1301 ascii_compatible, supplementary,
2fe1edd1
KH
1302 code_offset)
1303 Lisp_Object name;
1304 int dimension;
1305 unsigned char *code_space;
1306 unsigned min_code, max_code;
1307 int iso_final, iso_revision, emacs_mule_id;
7acf89e6 1308 int ascii_compatible, supplementary;
2fe1edd1
KH
1309 int code_offset;
1310{
1311 Lisp_Object args[charset_arg_max];
1312 Lisp_Object plist[14];
1313 Lisp_Object val;
1314 int i;
1315
1316 args[charset_arg_name] = name;
1317 args[charset_arg_dimension] = make_number (dimension);
1318 val = Fmake_vector (make_number (8), make_number (0));
1319 for (i = 0; i < 8; i++)
1320 ASET (val, i, make_number (code_space[i]));
1321 args[charset_arg_code_space] = val;
1322 args[charset_arg_min_code] = make_number (min_code);
1323 args[charset_arg_max_code] = make_number (max_code);
1324 args[charset_arg_iso_final]
1325 = (iso_final < 0 ? Qnil : make_number (iso_final));
1326 args[charset_arg_iso_revision] = make_number (iso_revision);
1327 args[charset_arg_emacs_mule_id]
1328 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1329 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1330 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1331 args[charset_arg_invalid_code] = Qnil;
1332 args[charset_arg_code_offset] = make_number (code_offset);
1333 args[charset_arg_map] = Qnil;
1334 args[charset_arg_subset] = Qnil;
1335 args[charset_arg_superset] = Qnil;
1336 args[charset_arg_unify_map] = Qnil;
1337
d67b4f80 1338 plist[0] = intern_c_string (":name");
2fe1edd1 1339 plist[1] = args[charset_arg_name];
d67b4f80 1340 plist[2] = intern_c_string (":dimension");
2fe1edd1 1341 plist[3] = args[charset_arg_dimension];
d67b4f80 1342 plist[4] = intern_c_string (":code-space");
2fe1edd1 1343 plist[5] = args[charset_arg_code_space];
d67b4f80 1344 plist[6] = intern_c_string (":iso-final-char");
2fe1edd1 1345 plist[7] = args[charset_arg_iso_final];
d67b4f80 1346 plist[8] = intern_c_string (":emacs-mule-id");
2fe1edd1 1347 plist[9] = args[charset_arg_emacs_mule_id];
d67b4f80 1348 plist[10] = intern_c_string (":ascii-compatible-p");
2fe1edd1 1349 plist[11] = args[charset_arg_ascii_compatible_p];
d67b4f80 1350 plist[12] = intern_c_string (":code-offset");
2fe1edd1
KH
1351 plist[13] = args[charset_arg_code_offset];
1352
1353 args[charset_arg_plist] = Flist (14, plist);
1354 Fdefine_charset_internal (charset_arg_max, args);
1355
1356 return XINT (CHARSET_SYMBOL_ID (name));
1357}
1358
1359
3263d5a2
KH
1360DEFUN ("define-charset-alias", Fdefine_charset_alias,
1361 Sdefine_charset_alias, 2, 2, 0,
1362 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1363 (alias, charset)
1364 Lisp_Object alias, charset;
4ed46869 1365{
3263d5a2
KH
1366 Lisp_Object attr;
1367
1368 CHECK_CHARSET_GET_ATTR (charset, attr);
1369 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1370 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1371 return Qnil;
1372}
4ed46869 1373
4ed46869 1374
3263d5a2 1375DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1376 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1377 (charset)
1378 Lisp_Object charset;
1379{
1380 Lisp_Object attrs;
1381
1382 CHECK_CHARSET_GET_ATTR (charset, attrs);
1383 return CHARSET_ATTR_PLIST (attrs);
1384}
1385
1386
1387DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1388 doc: /* Set CHARSET's property list to PLIST. */)
1389 (charset, plist)
1390 Lisp_Object charset, plist;
1391{
1392 Lisp_Object attrs;
1393
1394 CHECK_CHARSET_GET_ATTR (charset, attrs);
1395 CHARSET_ATTR_PLIST (attrs) = plist;
1396 return plist;
1397}
1398
1399
dbbb237d 1400DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1401 doc: /* Unify characters of CHARSET with Unicode.
1402This means reading the relevant file and installing the table defined
dbbb237d
KH
1403by CHARSET's `:unify-map' property.
1404
64165ae2
DL
1405Optional second arg UNIFY-MAP is a file name string or a vector. It has
1406the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1407`define-charset' (which see).
1408
1409Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1410 (charset, unify_map, deunify)
1411 Lisp_Object charset, unify_map, deunify;
8a73a704 1412{
3263d5a2
KH
1413 int id;
1414 struct charset *cs;
8f924df7 1415
3263d5a2
KH
1416 CHECK_CHARSET_GET_ID (charset, id);
1417 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1418 if (NILP (deunify)
1419 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1420 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1421 return Qnil;
dbbb237d 1422
3263d5a2 1423 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1424 if (NILP (deunify))
1425 {
ecca2aad
KH
1426 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1427 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1428 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1429 if (NILP (unify_map))
1430 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1431 else
ecca2aad
KH
1432 {
1433 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1434 signal_error ("Bad unify-map", unify_map);
1435 CHARSET_UNIFY_MAP (cs) = unify_map;
1436 }
1437 if (NILP (Vchar_unify_table))
1438 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1439 char_table_set_range (Vchar_unify_table,
1440 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1441 CHARSET_UNIFIED_P (cs) = 1;
1442 }
1443 else if (CHAR_TABLE_P (Vchar_unify_table))
1444 {
1445 int min_code = CHARSET_MIN_CODE (cs);
1446 int max_code = CHARSET_MAX_CODE (cs);
1447 int min_char = DECODE_CHAR (cs, min_code);
1448 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1449
dbbb237d
KH
1450 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1451 }
8f924df7 1452
3263d5a2 1453 return Qnil;
8a73a704
KH
1454}
1455
3fac5a51
KH
1456DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1457 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1458 doc: /*
d0cf2d48 1459Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1460DIMENSION is the number of bytes to represent a character: 1 or 2.
1461CHARS is the number of characters in a dimension: 94 or 96.
1462
1463This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1464If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1465return nil. */)
1466 (dimension, chars)
3fac5a51
KH
1467 Lisp_Object dimension, chars;
1468{
1469 int final_char;
1470
b7826503
PJ
1471 CHECK_NUMBER (dimension);
1472 CHECK_NUMBER (chars);
3263d5a2
KH
1473 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1474 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1475 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1476 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1477 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1478 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1479 break;
3fac5a51
KH
1480 return (final_char <= '?' ? make_number (final_char) : Qnil);
1481}
1482
3263d5a2
KH
1483static void
1484check_iso_charset_parameter (dimension, chars, final_char)
1485 Lisp_Object dimension, chars, final_char;
4ed46869 1486{
3263d5a2
KH
1487 CHECK_NATNUM (dimension);
1488 CHECK_NATNUM (chars);
1489 CHECK_NATNUM (final_char);
4ed46869 1490
3263d5a2
KH
1491 if (XINT (dimension) > 3)
1492 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1493 if (XINT (chars) != 94 && XINT (chars) != 96)
1494 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1495 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1496 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1497}
1498
1499
1500DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1501 4, 4, 0,
cefd8c4f
KH
1502 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1503
1504On decoding by an ISO-2022 base coding system, when a charset
1505specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1506if CHARSET is designated instead. */)
3263d5a2
KH
1507 (dimension, chars, final_char, charset)
1508 Lisp_Object dimension, chars, final_char, charset;
1509{
1510 int id;
82215ce9 1511 int chars_flag;
4ed46869 1512
3263d5a2
KH
1513 CHECK_CHARSET_GET_ID (charset, id);
1514 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1515 chars_flag = XINT (chars) == 96;
1516 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1517 return Qnil;
1518}
1519
3263d5a2 1520
2e344af3
KH
1521/* Return information about charsets in the text at PTR of NBYTES
1522 bytes, which are NCHARS characters. The value is:
f6302ac9 1523
cfe34140 1524 0: Each character is represented by one byte. This is always
3263d5a2
KH
1525 true for a unibyte string. For a multibyte string, true if
1526 it contains only ASCII characters.
1527
28c026cd
DL
1528 1: No charsets other than ascii, control-1, and latin-1 are
1529 found.
1d67c29b 1530
3263d5a2
KH
1531 2: Otherwise.
1532*/
4ed46869
KH
1533
1534int
3263d5a2
KH
1535string_xstring_p (string)
1536 Lisp_Object string;
4ed46869 1537{
8f924df7
KH
1538 const unsigned char *p = SDATA (string);
1539 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1540
8f924df7 1541 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1542 return 0;
1543
3263d5a2 1544 while (p < endp)
0282eb69 1545 {
3263d5a2 1546 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1547
3cc67a4d 1548 if (c >= 0x100)
3263d5a2 1549 return 2;
0282eb69 1550 }
3263d5a2
KH
1551 return 1;
1552}
05505664 1553
05505664 1554
3263d5a2 1555/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1556
3cc67a4d
KH
1557 CHARSETS is a vector. If Nth element is non-nil, it means the
1558 charset whose id is N is already found.
2e344af3 1559
3263d5a2 1560 It may lookup a translation table TABLE if supplied. */
2e344af3 1561
3263d5a2 1562static void
3cc67a4d 1563find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
dbbb237d 1564 const unsigned char *ptr;
42ca828e 1565 EMACS_INT nchars, nbytes;
3263d5a2 1566 Lisp_Object charsets, table;
3cc67a4d 1567 int multibyte;
3263d5a2 1568{
dbbb237d 1569 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1570
1571 if (nchars == nbytes)
3263d5a2 1572 {
3cc67a4d
KH
1573 if (multibyte)
1574 ASET (charsets, charset_ascii, Qt);
1575 else
1576 while (ptr < pend)
1577 {
1578 int c = *ptr++;
1579
1580 if (!NILP (table))
1581 c = translate_char (table, c);
1582 if (ASCII_BYTE_P (c))
1583 ASET (charsets, charset_ascii, Qt);
1584 else
1585 ASET (charsets, charset_eight_bit, Qt);
1586 }
1587 }
1588 else
1589 {
1590 while (ptr < pend)
3263d5a2 1591 {
3cc67a4d
KH
1592 int c = STRING_CHAR_ADVANCE (ptr);
1593 struct charset *charset;
3263d5a2 1594
3cc67a4d
KH
1595 if (!NILP (table))
1596 c = translate_char (table, c);
1597 charset = CHAR_CHARSET (c);
1598 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1599 }
4ed46869 1600 }
4ed46869
KH
1601}
1602
1603DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1604 2, 3, 0,
fdb82f93
PJ
1605 doc: /* Return a list of charsets in the region between BEG and END.
1606BEG and END are buffer positions.
1607Optional arg TABLE if non-nil is a translation table to look up.
1608
fdb82f93
PJ
1609If the current buffer is unibyte, the returned list may contain
1610only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1611 (beg, end, table)
23d2a7f1 1612 Lisp_Object beg, end, table;
4ed46869 1613{
3263d5a2 1614 Lisp_Object charsets;
42ca828e
DL
1615 EMACS_INT from, from_byte, to, stop, stop_byte;
1616 int i;
4ed46869 1617 Lisp_Object val;
3cc67a4d 1618 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1619
1620 validate_region (&beg, &end);
1621 from = XFASTINT (beg);
1622 stop = to = XFASTINT (end);
6ae1f27e 1623
4ed46869 1624 if (from < GPT && GPT < to)
6ae1f27e
RS
1625 {
1626 stop = GPT;
1627 stop_byte = GPT_BYTE;
1628 }
1629 else
1630 stop_byte = CHAR_TO_BYTE (stop);
1631
1632 from_byte = CHAR_TO_BYTE (from);
1633
3263d5a2 1634 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1635 while (1)
1636 {
3263d5a2 1637 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1638 stop_byte - from_byte, charsets, table,
1639 multibyte);
4ed46869 1640 if (stop < to)
6ae1f27e
RS
1641 {
1642 from = stop, from_byte = stop_byte;
1643 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1644 }
4ed46869
KH
1645 else
1646 break;
1647 }
6ae1f27e 1648
4ed46869 1649 val = Qnil;
3263d5a2 1650 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1651 if (!NILP (AREF (charsets, i)))
3263d5a2 1652 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1653 return val;
1654}
1655
1656DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1657 1, 2, 0,
fdb82f93
PJ
1658 doc: /* Return a list of charsets in STR.
1659Optional arg TABLE if non-nil is a translation table to look up.
1660
fdb82f93 1661If STR is unibyte, the returned list may contain
3263d5a2 1662only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1663 (str, table)
23d2a7f1 1664 Lisp_Object str, table;
4ed46869 1665{
3263d5a2 1666 Lisp_Object charsets;
4ed46869
KH
1667 int i;
1668 Lisp_Object val;
1669
b7826503 1670 CHECK_STRING (str);
87b089ad 1671
3263d5a2 1672 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1673 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1674 charsets, table,
1675 STRING_MULTIBYTE (str));
4ed46869 1676 val = Qnil;
3263d5a2 1677 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1678 if (!NILP (AREF (charsets, i)))
3263d5a2 1679 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1680 return val;
1681}
2e344af3 1682
4ed46869 1683\f
3263d5a2 1684
ecca2aad
KH
1685/* Return a unified character code for C (>= 0x110000). VAL is a
1686 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1687 charset symbol. */
1688int
1689maybe_unify_char (c, val)
1690 int c;
1691 Lisp_Object val;
1692{
1693 struct charset *charset;
1694
1695 if (INTEGERP (val))
1696 return XINT (val);
1697 if (NILP (val))
1698 return c;
1699
1700 CHECK_CHARSET_GET_CHARSET (val, charset);
1701 load_charset (charset, 1);
1702 if (! inhibit_load_charset_map)
1703 {
1704 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1705 if (! NILP (val))
1706 c = XINT (val);
1707 }
1708 else
1709 {
1710 int code_index = c - CHARSET_CODE_OFFSET (charset);
1711 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1712
1713 if (unified > 0)
1714 c = unified;
1715 }
1716 return c;
1717}
1718
1719
3263d5a2
KH
1720/* Return a character correponding to the code-point CODE of
1721 CHARSET. */
1722
1723int
1724decode_char (charset, code)
1725 struct charset *charset;
1726 unsigned code;
4ed46869 1727{
3263d5a2
KH
1728 int c, char_index;
1729 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1730
3263d5a2
KH
1731 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1732 return -1;
4ed46869 1733
374c5cfd 1734 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1735 {
374c5cfd
KH
1736 Lisp_Object subset_info;
1737
1738 subset_info = CHARSET_SUBSET (charset);
1739 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1740 code -= XINT (AREF (subset_info, 3));
1741 if (code < XFASTINT (AREF (subset_info, 1))
1742 || code > XFASTINT (AREF (subset_info, 2)))
1743 c = -1;
1744 else
1745 c = DECODE_CHAR (charset, code);
2e344af3 1746 }
374c5cfd 1747 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1748 {
3263d5a2 1749 Lisp_Object parents;
4ed46869 1750
374c5cfd 1751 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1752 c = -1;
1753 for (; CONSP (parents); parents = XCDR (parents))
1754 {
1755 int id = XINT (XCAR (XCAR (parents)));
1756 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1757 unsigned this_code = code - code_offset;
4ed46869 1758
3263d5a2
KH
1759 charset = CHARSET_FROM_ID (id);
1760 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1761 break;
1762 }
1763 }
1764 else
ac4137cc 1765 {
3263d5a2 1766 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1767 if (char_index < 0)
1768 return -1;
4ed46869 1769
3263d5a2 1770 if (method == CHARSET_METHOD_MAP)
ac4137cc 1771 {
3263d5a2 1772 Lisp_Object decoder;
4ed46869 1773
3263d5a2
KH
1774 decoder = CHARSET_DECODER (charset);
1775 if (! VECTORP (decoder))
ecca2aad
KH
1776 {
1777 load_charset (charset, 1);
1778 decoder = CHARSET_DECODER (charset);
1779 }
1780 if (VECTORP (decoder))
1781 c = XINT (AREF (decoder, char_index));
1782 else
1783 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1784 }
ecca2aad 1785 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1786 {
3263d5a2 1787 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1788 if (CHARSET_UNIFIED_P (charset)
1789 && c > MAX_UNICODE_CHAR)
1790 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1791 }
1792 }
4ed46869 1793
3263d5a2 1794 return c;
90d7b74e
KH
1795}
1796
374c5cfd
KH
1797/* Variable used temporarily by the macro ENCODE_CHAR. */
1798Lisp_Object charset_work;
4ed46869 1799
3263d5a2 1800/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1801 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1802 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1803
3263d5a2
KH
1804unsigned
1805encode_char (charset, c)
1806 struct charset *charset;
9b6a601f 1807 int c;
9d3d8cba 1808{
3263d5a2
KH
1809 unsigned code;
1810 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1811
3263d5a2 1812 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1813 {
6809ca75 1814 Lisp_Object deunifier;
ecca2aad 1815 int code_index = -1;
4ed46869 1816
3263d5a2
KH
1817 deunifier = CHARSET_DEUNIFIER (charset);
1818 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1819 {
ecca2aad 1820 load_charset (charset, 2);
3263d5a2 1821 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1822 }
ecca2aad
KH
1823 if (CHAR_TABLE_P (deunifier))
1824 {
1825 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1826
1827 if (INTEGERP (deunified))
1828 code_index = XINT (deunified);
1829 }
1830 else
1831 {
1832 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1833 }
1834 if (code_index >= 0)
1835 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1836 }
9d3d8cba 1837
374c5cfd
KH
1838 if (method == CHARSET_METHOD_SUBSET)
1839 {
1840 Lisp_Object subset_info;
1841 struct charset *this_charset;
1842
1843 subset_info = CHARSET_SUBSET (charset);
1844 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1845 code = ENCODE_CHAR (this_charset, c);
1846 if (code == CHARSET_INVALID_CODE (this_charset)
1847 || code < XFASTINT (AREF (subset_info, 1))
1848 || code > XFASTINT (AREF (subset_info, 2)))
1849 return CHARSET_INVALID_CODE (charset);
1850 code += XINT (AREF (subset_info, 3));
1851 return code;
1852 }
9d3d8cba 1853
374c5cfd 1854 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1855 {
3263d5a2 1856 Lisp_Object parents;
d2665018 1857
374c5cfd 1858 parents = CHARSET_SUPERSET (charset);
3263d5a2 1859 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1860 {
3263d5a2
KH
1861 int id = XINT (XCAR (XCAR (parents)));
1862 int code_offset = XINT (XCDR (XCAR (parents)));
1863 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1864
3263d5a2 1865 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1866 if (code != CHARSET_INVALID_CODE (this_charset))
1867 return code + code_offset;
beeedaad 1868 }
3263d5a2
KH
1869 return CHARSET_INVALID_CODE (charset);
1870 }
1bcc1567 1871
15c85a88
KH
1872 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1873 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1874 return CHARSET_INVALID_CODE (charset);
1bcc1567 1875
3263d5a2 1876 if (method == CHARSET_METHOD_MAP)
3f62427c 1877 {
3263d5a2 1878 Lisp_Object encoder;
beeedaad 1879 Lisp_Object val;
9b6a601f 1880
3263d5a2
KH
1881 encoder = CHARSET_ENCODER (charset);
1882 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1883 {
1884 load_charset (charset, 2);
1885 encoder = CHARSET_ENCODER (charset);
1886 }
1887 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1888 {
1889 val = CHAR_TABLE_REF (encoder, c);
1890 if (NILP (val))
1891 return CHARSET_INVALID_CODE (charset);
1892 code = XINT (val);
1893 if (! CHARSET_COMPACT_CODES_P (charset))
1894 code = INDEX_TO_CODE_POINT (charset, code);
1895 }
1896 else
1897 {
1898 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1899 code = INDEX_TO_CODE_POINT (charset, code);
1900 }
3263d5a2 1901 }
820ee249 1902 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1903 {
ecca2aad
KH
1904 int code_index = c - CHARSET_CODE_OFFSET (charset);
1905
1906 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1907 }
8ac5a9cc 1908
3263d5a2 1909 return code;
4ed46869
KH
1910}
1911
4ed46869 1912
3263d5a2
KH
1913DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1914 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1915Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1916
3263d5a2 1917CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1918
3263d5a2 1919Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1920and CODE-POINT to a character. Currently not supported and just ignored. */)
3263d5a2
KH
1921 (charset, code_point, restriction)
1922 Lisp_Object charset, code_point, restriction;
4ed46869 1923{
3263d5a2
KH
1924 int c, id;
1925 unsigned code;
1926 struct charset *charsetp;
859f2b3c 1927
3263d5a2
KH
1928 CHECK_CHARSET_GET_ID (charset, id);
1929 if (CONSP (code_point))
1930 {
8f924df7
KH
1931 CHECK_NATNUM_CAR (code_point);
1932 CHECK_NATNUM_CDR (code_point);
69f8de5b 1933 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1934 }
4ed46869
KH
1935 else
1936 {
3263d5a2
KH
1937 CHECK_NATNUM (code_point);
1938 code = XINT (code_point);
4ed46869 1939 }
3263d5a2
KH
1940 charsetp = CHARSET_FROM_ID (id);
1941 c = DECODE_CHAR (charsetp, code);
1942 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1943}
1944
859f2b3c 1945
3263d5a2
KH
1946DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1947 doc: /* Encode the character CH into a code-point of CHARSET.
1948Return nil if CHARSET doesn't include CH.
17e7ef1b 1949
d0cf2d48 1950Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2
KH
1951code-point in CCS. Currently not supported and just ignored. */)
1952 (ch, charset, restriction)
1953 Lisp_Object ch, charset, restriction;
4ed46869 1954{
16fed1fc 1955 int id;
3263d5a2
KH
1956 unsigned code;
1957 struct charset *charsetp;
046b1f03 1958
3263d5a2
KH
1959 CHECK_CHARSET_GET_ID (charset, id);
1960 CHECK_NATNUM (ch);
3263d5a2 1961 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1962 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1963 if (code == CHARSET_INVALID_CODE (charsetp))
1964 return Qnil;
1965 if (code > 0x7FFFFFF)
1966 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1967 return make_number (code);
beeedaad
KH
1968}
1969
beeedaad 1970
b121a744
KH
1971DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1972 doc:
1973 /* Return a character of CHARSET whose position codes are CODEn.
1974
1975CODE1 through CODE4 are optional, but if you don't supply sufficient
1976position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1977is specified. */)
b121a744
KH
1978 (charset, code1, code2, code3, code4)
1979 Lisp_Object charset, code1, code2, code3, code4;
beeedaad 1980{
3263d5a2
KH
1981 int id, dimension;
1982 struct charset *charsetp;
b121a744
KH
1983 unsigned code;
1984 int c;
87b089ad 1985
3263d5a2
KH
1986 CHECK_CHARSET_GET_ID (charset, id);
1987 charsetp = CHARSET_FROM_ID (id);
4ed46869 1988
b121a744
KH
1989 dimension = CHARSET_DIMENSION (charsetp);
1990 if (NILP (code1))
d47073ca
KH
1991 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1992 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1993 else
859f2b3c 1994 {
b121a744
KH
1995 CHECK_NATNUM (code1);
1996 if (XFASTINT (code1) >= 0x100)
1997 args_out_of_range (make_number (0xFF), code1);
1998 code = XFASTINT (code1);
859f2b3c 1999
b0a1e45e 2000 if (dimension > 1)
beeedaad 2001 {
b121a744 2002 code <<= 8;
b0a1e45e
KH
2003 if (NILP (code2))
2004 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 2005 else
b121a744 2006 {
b0a1e45e
KH
2007 CHECK_NATNUM (code2);
2008 if (XFASTINT (code2) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code2);
2010 code |= XFASTINT (code2);
b121a744 2011 }
99529c2c 2012
b0a1e45e 2013 if (dimension > 2)
b121a744
KH
2014 {
2015 code <<= 8;
b0a1e45e
KH
2016 if (NILP (code3))
2017 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
2018 else
2019 {
b0a1e45e
KH
2020 CHECK_NATNUM (code3);
2021 if (XFASTINT (code3) >= 0x100)
2022 args_out_of_range (make_number (0xFF), code3);
2023 code |= XFASTINT (code3);
2024 }
2025
2026 if (dimension > 3)
2027 {
2028 code <<= 8;
2029 if (NILP (code4))
2030 code |= charsetp->code_space[0];
2031 else
2032 {
2033 CHECK_NATNUM (code4);
2034 if (XFASTINT (code4) >= 0x100)
2035 args_out_of_range (make_number (0xFF), code4);
2036 code |= XFASTINT (code4);
2037 }
b121a744
KH
2038 }
2039 }
beeedaad 2040 }
859f2b3c 2041 }
beeedaad 2042
b121a744
KH
2043 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2044 code &= 0x7F7F7F7F;
2045 c = DECODE_CHAR (charsetp, code);
2046 if (c < 0)
2047 error ("Invalid code(s)");
3263d5a2 2048 return make_number (c);
4ed46869
KH
2049}
2050
beeedaad 2051
3263d5a2
KH
2052/* Return the first charset in CHARSET_LIST that contains C.
2053 CHARSET_LIST is a list of charset IDs. If it is nil, use
2054 Vcharset_ordered_list. */
beeedaad 2055
3263d5a2
KH
2056struct charset *
2057char_charset (c, charset_list, code_return)
2058 int c;
2059 Lisp_Object charset_list;
2060 unsigned *code_return;
2e344af3 2061{
8a1816bb
KH
2062 int maybe_null = 0;
2063
3263d5a2
KH
2064 if (NILP (charset_list))
2065 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2066 else
2067 maybe_null = 1;
beeedaad 2068
6c652beb 2069 while (CONSP (charset_list))
2e344af3 2070 {
3263d5a2
KH
2071 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2072 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2073
3263d5a2 2074 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2075 {
3263d5a2
KH
2076 if (code_return)
2077 *code_return = code;
2078 return charset;
3f62427c 2079 }
3263d5a2 2080 charset_list = XCDR (charset_list);
6c652beb
KH
2081 if (c <= MAX_UNICODE_CHAR
2082 && EQ (charset_list, Vcharset_non_preferred_head))
2083 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2084 }
8a1816bb
KH
2085 return (maybe_null ? NULL
2086 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2087 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2088}
2089
2e344af3 2090
3263d5a2 2091DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2092 doc:
d0cf2d48 2093 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2094The charset is decided by the current priority order of charsets.
2095A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2096CH in the charset. */)
3263d5a2
KH
2097 (ch)
2098 Lisp_Object ch;
4ed46869 2099{
3263d5a2
KH
2100 struct charset *charset;
2101 int c, dimension;
2102 unsigned code;
4ed46869
KH
2103 Lisp_Object val;
2104
3263d5a2
KH
2105 CHECK_CHARACTER (ch);
2106 c = XFASTINT (ch);
2107 charset = CHAR_CHARSET (c);
2108 if (! charset)
3cc67a4d 2109 abort ();
3263d5a2
KH
2110 code = ENCODE_CHAR (charset, c);
2111 if (code == CHARSET_INVALID_CODE (charset))
2112 abort ();
2113 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2114 for (val = Qnil; dimension > 0; dimension--)
2115 {
2116 val = Fcons (make_number (code & 0xFF), val);
2117 code >>= 8;
2118 }
3263d5a2 2119 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2120}
2121
740f080d 2122
4cb75c4b
KH
2123DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2124 doc: /* Return the charset of highest priority that contains CH.
2125If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2126from which to find the charset. It may also be a coding system. In
2127that case, find the charset from what supported by that coding system. */)
2128 (ch, restriction)
2129 Lisp_Object ch, restriction;
4ed46869 2130{
3263d5a2 2131 struct charset *charset;
4ed46869 2132
3263d5a2 2133 CHECK_CHARACTER (ch);
4cb75c4b
KH
2134 if (NILP (restriction))
2135 charset = CHAR_CHARSET (XINT (ch));
2136 else
2137 {
2138 Lisp_Object charset_list;
2139
2140 if (CONSP (restriction))
2141 {
2142 for (charset_list = Qnil; CONSP (restriction);
2143 restriction = XCDR (restriction))
2144 {
2145 int id;
2146
2147 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2148 charset_list = Fcons (make_number (id), charset_list);
2149 }
2150 charset_list = Fnreverse (charset_list);
2151 }
2152 else
2153 charset_list = coding_system_charset_list (restriction);
2154 charset = char_charset (XINT (ch), charset_list, NULL);
2155 if (! charset)
2156 return Qnil;
2157 }
3263d5a2 2158 return (CHARSET_NAME (charset));
4ed46869
KH
2159}
2160
17e7ef1b 2161
3263d5a2
KH
2162DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2163 doc: /*
2164Return charset of a character in the current buffer at position POS.
2165If POS is nil, it defauls to the current point.
2166If POS is out of range, the value is nil. */)
2167 (pos)
2168 Lisp_Object pos;
2e344af3 2169{
3263d5a2
KH
2170 Lisp_Object ch;
2171 struct charset *charset;
046b1f03 2172
3263d5a2
KH
2173 ch = Fchar_after (pos);
2174 if (! INTEGERP (ch))
2175 return ch;
2176 charset = CHAR_CHARSET (XINT (ch));
2177 return (CHARSET_NAME (charset));
6ae1f27e 2178}
9036eb45 2179
87b089ad 2180
3263d5a2
KH
2181DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2182 doc: /*
2183Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2184
2185ISO 2022's designation sequence (escape sequence) distinguishes charsets
2186by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2187whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2188See the documentation of the function `charset-info' for the meanings of
2189DIMENSION, CHARS, and FINAL-CHAR. */)
2190 (dimension, chars, final_char)
2191 Lisp_Object dimension, chars, final_char;
6ae1f27e 2192{
3263d5a2 2193 int id;
82215ce9 2194 int chars_flag;
a8a35e61 2195
3263d5a2 2196 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2197 chars_flag = XFASTINT (chars) == 96;
2198 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2199 XFASTINT (final_char));
2200 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2201}
2202
87b089ad 2203
3263d5a2
KH
2204DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2205 0, 0, 0,
2206 doc: /*
ecca2aad
KH
2207Internal use only.
2208Clear temporary charset mapping tables.
2209It should be called only from temacs invoked for dumping. */)
3263d5a2 2210 ()
87b089ad 2211{
ecca2aad 2212 if (temp_charset_work)
87b089ad 2213 {
ecca2aad
KH
2214 free (temp_charset_work);
2215 temp_charset_work = NULL;
2e344af3 2216 }
2e344af3 2217
ecca2aad
KH
2218 if (CHAR_TABLE_P (Vchar_unify_table))
2219 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2220
3263d5a2 2221 return Qnil;
740f080d
KH
2222}
2223
8ddf5e57
DL
2224DEFUN ("charset-priority-list", Fcharset_priority_list,
2225 Scharset_priority_list, 0, 1, 0,
2226 doc: /* Return the list of charsets ordered by priority.
2227HIGHESTP non-nil means just return the highest priority one. */)
2228 (highestp)
2229 Lisp_Object highestp;
2e344af3 2230{
8ddf5e57 2231 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2232
8ddf5e57 2233 if (!NILP (highestp))
16fed1fc 2234 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2235
8ddf5e57 2236 while (!NILP (list))
2e344af3 2237 {
16fed1fc 2238 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2239 list = XCDR (list);
2e344af3 2240 }
8ddf5e57 2241 return Fnreverse (val);
2e344af3
KH
2242}
2243
8ddf5e57
DL
2244DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2245 1, MANY, 0,
2246 doc: /* Assign higher priority to the charsets given as arguments.
2247usage: (set-charset-priority &rest charsets) */)
2248 (nargs, args)
2249 int nargs;
4ed46869
KH
2250 Lisp_Object *args;
2251{
af7c60ca 2252 Lisp_Object new_head, old_list, arglist[2];
321c819c 2253 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2254 int i, id;
4ed46869 2255
8ddf5e57 2256 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2257 new_head = Qnil;
8ddf5e57 2258 for (i = 0; i < nargs; i++)
4ed46869 2259 {
8ddf5e57 2260 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2261 if (! NILP (Fmemq (make_number (id), old_list)))
2262 {
2263 old_list = Fdelq (make_number (id), old_list);
2264 new_head = Fcons (make_number (id), new_head);
2265 }
5729c92f 2266 }
8ddf5e57 2267 arglist[0] = Fnreverse (new_head);
6a9c90ec 2268 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2269 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2270 charset_ordered_list_tick++;
5729c92f 2271
6809ca75 2272 charset_unibyte = -1;
321c819c 2273 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2274 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2275 {
e77415b0 2276 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2277 list_2022 = Fcons (XCAR (old_list), list_2022);
2278 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2279 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2280 if (charset_unibyte < 0)
2281 {
2282 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2283
2284 if (CHARSET_DIMENSION (charset) == 1
2285 && CHARSET_ASCII_COMPATIBLE_P (charset)
2286 && CHARSET_MAX_CHAR (charset) >= 0x80)
2287 charset_unibyte = CHARSET_ID (charset);
2288 }
4ed46869 2289 }
321c819c
KH
2290 Viso_2022_charset_list = Fnreverse (list_2022);
2291 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2292 if (charset_unibyte < 0)
2293 charset_unibyte = charset_iso_8859_1;
4ed46869 2294
8ddf5e57 2295 return Qnil;
4ed46869
KH
2296}
2297
d5b33309
KH
2298DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2299 0, 1, 0,
2300 doc: /* Internal use only.
2301Return charset identification number of CHARSET. */)
2302 (charset)
2303 Lisp_Object charset;
4ed46869 2304{
d5b33309 2305 int id;
4ed46869 2306
d5b33309
KH
2307 CHECK_CHARSET_GET_ID (charset, id);
2308 return make_number (id);
4ed46869
KH
2309}
2310
4ed46869 2311\f
3263d5a2
KH
2312void
2313init_charset ()
4ed46869 2314{
c8f94403
GM
2315 Lisp_Object tempdir;
2316 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
6809ca75 2317 if (access ((char *) SDATA (tempdir), 0) < 0)
c8f94403
GM
2318 {
2319 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2320Emacs will not function correctly without the character map files.\n\
2321Please check your installation!\n",
2322 tempdir);
2323 /* TODO should this be a fatal error? (Bug#909) */
2324 }
2325
2326 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2327}
2328
4ed46869 2329
dfcf069d 2330void
4ed46869
KH
2331init_charset_once ()
2332{
2333 int i, j, k;
2334
3263d5a2
KH
2335 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2336 for (j = 0; j < ISO_MAX_CHARS; j++)
2337 for (k = 0; k < ISO_MAX_FINAL; k++)
2338 iso_charset_table[i][j][k] = -1;
4ed46869 2339
60383934 2340 for (i = 0; i < 256; i++)
3263d5a2 2341 emacs_mule_charset[i] = NULL;
4ed46869 2342
7c7dceee
KH
2343 charset_jisx0201_roman = -1;
2344 charset_jisx0208_1978 = -1;
2345 charset_jisx0208 = -1;
d32320c4 2346 charset_ksc5601 = -1;
4ed46869
KH
2347}
2348
2349#ifdef emacs
2350
dfcf069d 2351void
4ed46869
KH
2352syms_of_charset ()
2353{
3263d5a2
KH
2354 DEFSYM (Qcharsetp, "charsetp");
2355
2356 DEFSYM (Qascii, "ascii");
2357 DEFSYM (Qunicode, "unicode");
6c652beb 2358 DEFSYM (Qemacs, "emacs");
2fe1edd1 2359 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2360 DEFSYM (Qiso_8859_1, "iso-8859-1");
2361
2362 DEFSYM (Qgl, "gl");
2363 DEFSYM (Qgr, "gr");
2364
3263d5a2
KH
2365 staticpro (&Vcharset_ordered_list);
2366 Vcharset_ordered_list = Qnil;
2367
2368 staticpro (&Viso_2022_charset_list);
2369 Viso_2022_charset_list = Qnil;
2370
2371 staticpro (&Vemacs_mule_charset_list);
2372 Vemacs_mule_charset_list = Qnil;
2373
3943ed76
KH
2374 /* Don't staticpro them here. It's done in syms_of_fns. */
2375 QCtest = intern (":test");
2376 Qeq = intern ("eq");
2377
3263d5a2 2378 staticpro (&Vcharset_hash_table);
8f924df7
KH
2379 {
2380 Lisp_Object args[2];
2381 args[0] = QCtest;
2382 args[1] = Qeq;
2383 Vcharset_hash_table = Fmake_hash_table (2, args);
2384 }
3263d5a2
KH
2385
2386 charset_table_size = 128;
2387 charset_table = ((struct charset *)
2388 xmalloc (sizeof (struct charset) * charset_table_size));
2389 charset_table_used = 0;
2390
3263d5a2
KH
2391 defsubr (&Scharsetp);
2392 defsubr (&Smap_charset_chars);
2393 defsubr (&Sdefine_charset_internal);
2394 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2395 defsubr (&Scharset_plist);
2396 defsubr (&Sset_charset_plist);
2397 defsubr (&Sunify_charset);
3fac5a51 2398 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2399 defsubr (&Sdeclare_equiv_charset);
2400 defsubr (&Sfind_charset_region);
2401 defsubr (&Sfind_charset_string);
3263d5a2
KH
2402 defsubr (&Sdecode_char);
2403 defsubr (&Sencode_char);
4ed46869 2404 defsubr (&Ssplit_char);
3263d5a2 2405 defsubr (&Smake_char);
4ed46869 2406 defsubr (&Schar_charset);
90d7b74e 2407 defsubr (&Scharset_after);
4ed46869 2408 defsubr (&Siso_charset);
3263d5a2 2409 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2410 defsubr (&Scharset_priority_list);
2411 defsubr (&Sset_charset_priority);
d5b33309 2412 defsubr (&Scharset_id_internal);
3263d5a2 2413
4beef065 2414 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2415 doc: /* *List of directories to search for charset map files. */);
4beef065 2416 Vcharset_map_path = Qnil;
4ed46869 2417
ecca2aad
KH
2418 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2419 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2420 inhibit_load_charset_map = 0;
2421
4ed46869 2422 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2423 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2424 Vcharset_list = Qnil;
2425
6a9c90ec
KH
2426 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2427 doc: /* ISO639 language mnemonic symbol for the current language environment.
2428If the current language environment is for multiple languages (e.g. "Latin-1"),
2429the value may be a list of mnemonics. */);
2430 Vcurrent_iso639_language = Qnil;
2431
2fe1edd1
KH
2432 charset_ascii
2433 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2434 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2435 charset_iso_8859_1
2436 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2437 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2438 charset_unicode
73fbf2d9 2439 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2440 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2441 charset_emacs
2442 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2443 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2444 charset_eight_bit
2445 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2446 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2447 MAX_5_BYTE_CHAR + 1);
6809ca75 2448 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2449}
2450
2451#endif /* emacs */
cefd8c4f
KH
2452
2453/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2454 (do not change this comment) */