(Fset_charset_priority): Update charset_unibyte.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
76b6f707 3 2008, 2009 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
76b6f707 5 2005, 2006, 2007, 2008, 2009
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869 33#include <sys/types.h>
4ed46869 34#include "lisp.h"
3263d5a2 35#include "character.h"
4ed46869
KH
36#include "charset.h"
37#include "coding.h"
fc6b09bf 38#include "disptab.h"
3263d5a2 39#include "buffer.h"
4ed46869 40
04c2f2c5 41/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 42
3263d5a2 43 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 44 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
4ed46869 48
3263d5a2
KH
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
04c2f2c5 52 charset_table as a struct charset.
4ed46869 53
3263d5a2 54*/
4ed46869 55
3263d5a2
KH
56/* List of all charsets. This variable is used only from Emacs
57 Lisp. */
4ed46869 58Lisp_Object Vcharset_list;
4ed46869 59
3263d5a2
KH
60/* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62Lisp_Object Vcharset_hash_table;
4ed46869 63
3263d5a2
KH
64/* Table of struct charset. */
65struct charset *charset_table;
4ed46869 66
3263d5a2 67static int charset_table_size;
5af5dd92 68static int charset_table_used;
4ed46869 69
3263d5a2 70Lisp_Object Qcharsetp;
4ed46869 71
3263d5a2
KH
72/* Special charset symbols. */
73Lisp_Object Qascii;
2fe1edd1 74Lisp_Object Qeight_bit;
3263d5a2
KH
75Lisp_Object Qiso_8859_1;
76Lisp_Object Qunicode;
6c652beb 77Lisp_Object Qemacs;
4ed46869 78
3263d5a2
KH
79/* The corresponding charsets. */
80int charset_ascii;
2fe1edd1 81int charset_eight_bit;
3263d5a2
KH
82int charset_iso_8859_1;
83int charset_unicode;
6c652beb 84int charset_emacs;
b0e3cf2b 85
7c7dceee
KH
86/* The other special charsets. */
87int charset_jisx0201_roman;
88int charset_jisx0208_1978;
89int charset_jisx0208;
d32320c4 90int charset_ksc5601;
c1a08b4c 91
3263d5a2
KH
92/* Value of charset attribute `charset-iso-plane'. */
93Lisp_Object Qgl, Qgr;
c1a08b4c 94
d1a04588
KH
95/* Charset of unibyte characters. */
96int charset_unibyte;
4ed46869 97
3263d5a2
KH
98/* List of charsets ordered by the priority. */
99Lisp_Object Vcharset_ordered_list;
4ed46869 100
6a9c90ec
KH
101/* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 charsets. */
103Lisp_Object Vcharset_non_preferred_head;
104
dbbb237d 105/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 106 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
107 -1. */
108unsigned short charset_ordered_list_tick;
4ed46869 109
3263d5a2
KH
110/* List of iso-2022 charsets. */
111Lisp_Object Viso_2022_charset_list;
35e623fb 112
3263d5a2
KH
113/* List of emacs-mule charsets. */
114Lisp_Object Vemacs_mule_charset_list;
115
116struct charset *emacs_mule_charset[256];
4ed46869
KH
117
118/* Mapping table from ISO2022's charset (specified by DIMENSION,
119 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
120int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
121
4beef065 122Lisp_Object Vcharset_map_path;
3263d5a2 123
ecca2aad
KH
124/* If nonzero, don't load charset maps. */
125int inhibit_load_charset_map;
3263d5a2 126
6a9c90ec
KH
127Lisp_Object Vcurrent_iso639_language;
128
64165ae2
DL
129/* Defined in chartab.c */
130extern void
131map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
132 Lisp_Object function, Lisp_Object table,
133 Lisp_Object arg, struct charset *charset,
134 unsigned from, unsigned to));
135
69f8de5b
KH
136#define CODE_POINT_TO_INDEX(charset, code) \
137 ((charset)->code_linear_p \
138 ? (code) - (charset)->min_code \
139 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
140 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
141 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
142 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
143 ? (((((code) >> 24) - (charset)->code_space[12]) \
144 * (charset)->code_space[11]) \
145 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
146 * (charset)->code_space[7]) \
147 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
148 * (charset)->code_space[3]) \
820ee249
KH
149 + (((code) & 0xFF) - (charset)->code_space[0]) \
150 - ((charset)->char_index_offset)) \
3263d5a2
KH
151 : -1)
152
153
154/* Convert the character index IDX to code-point CODE for CHARSET.
155 It is assumed that IDX is in a valid range. */
156
820ee249
KH
157#define INDEX_TO_CODE_POINT(charset, idx) \
158 ((charset)->code_linear_p \
159 ? (idx) + (charset)->min_code \
160 : (idx += (charset)->char_index_offset, \
161 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
162 | (((charset)->code_space[4] \
163 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
164 << 8) \
165 | (((charset)->code_space[8] \
166 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
167 << 16) \
168 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
169 << 24))))
4cf9710d 170
ecca2aad
KH
171/* Structure to hold mapping tables for a charset. Used by temacs
172 invoked for dumping. */
8a73a704 173
ecca2aad
KH
174static struct
175{
176 /* The current charset for which the following tables are setup. */
177 struct charset *current;
178
179 /* 1 iff the following table is used for encoder. */
180 short for_encoder;
181
182 /* When the following table is used for encoding, mininum and
183 maxinum character of the current charset. */
184 int min_char, max_char;
185
186 /* A Unicode character correspoinding to the code indice 0 (i.e. the
187 minimum code-point) of the current charset, or -1 if the code
188 indice 0 is not a Unicode character. This is checked when
189 table.encoder[CHAR] is zero. */
190 int zero_index_char;
191
192 union {
193 /* Table mapping code-indices (not code-points) of the current
194 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
195 doesn't belong to the current charset. */
196 int decoder[0x10000];
197 /* Table mapping Unicode characters to code-indices of the current
198 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
199 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
200 (0x20000..0x2FFFF). Note that there is no charset map that
201 uses both SMP and SIP. */
202 unsigned short encoder[0x20000];
203 } table;
204} *temp_charset_work;
205
206#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
207 do { \
208 if ((CODE) == 0) \
209 temp_charset_work->zero_index_char = (C); \
210 else if ((C) < 0x20000) \
211 temp_charset_work->table.encoder[(C)] = (CODE); \
212 else \
213 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
214 } while (0)
215
216#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
217 ((C) == temp_charset_work->zero_index_char ? 0 \
218 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
219 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
220 : temp_charset_work->table.encoder[(C) - 0x10000] \
221 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222
223#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
224 (temp_charset_work->table.decoder[(CODE)] = (C))
225
226#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
227 (temp_charset_work->table.decoder[(CODE)])
046b1f03 228\f
93bcb785 229
e9ce014c
KH
230/* Set to 1 to warn that a charset map is loaded and thus a buffer
231 text and a string data may be relocated. */
3263d5a2 232int charset_map_loaded;
4ed46869 233
e9ce014c 234struct charset_map_entries
4ed46869 235{
e9ce014c
KH
236 struct {
237 unsigned from, to;
238 int c;
239 } entry[0x10000];
240 struct charset_map_entries *next;
241};
242
ecca2aad
KH
243/* Load the mapping information of CHARSET from ENTRIES for
244 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
245 encoding (CONTROL_FLAG == 2).
246
247 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
248 and CHARSET->fast_map.
249
250 If CONTROL_FLAG is 1, setup the following tables according to
251 CHARSET->method and inhibit_load_charset_map.
4cf9710d 252
ecca2aad
KH
253 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
254 ----------------------+--------------------+---------------------------
255 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 258
ecca2aad 259 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 260
ecca2aad
KH
261 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
262 ----------------------+--------------------+---------------------------
263 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
264 ----------------------+--------------------+--------------------------
265 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
266*/
4ed46869 267
3263d5a2 268static void
e9ce014c 269load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 270 struct charset *charset;
e9ce014c
KH
271 struct charset_map_entries *entries;
272 int n_entries;
3263d5a2 273 int control_flag;
4ed46869 274{
3263d5a2 275 Lisp_Object vec, table;
3263d5a2
KH
276 unsigned max_code = CHARSET_MAX_CODE (charset);
277 int ascii_compatible_p = charset->ascii_compatible_p;
278 int min_char, max_char, nonascii_min_char;
3263d5a2 279 int i;
3263d5a2 280 unsigned char *fast_map = charset->fast_map;
99529c2c 281
e9ce014c
KH
282 if (n_entries <= 0)
283 return;
284
ecca2aad 285 if (control_flag)
8ac5a9cc 286 {
ecca2aad
KH
287 if (! inhibit_load_charset_map)
288 {
289 if (control_flag == 1)
290 {
291 if (charset->method == CHARSET_METHOD_MAP)
292 {
293 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 294
ecca2aad
KH
295 vec = CHARSET_DECODER (charset)
296 = Fmake_vector (make_number (n), make_number (-1));
297 }
298 else
299 {
300 char_table_set_range (Vchar_unify_table,
301 charset->min_char, charset->max_char,
302 Qnil);
303 }
304 }
305 else
306 {
307 table = Fmake_char_table (Qnil, Qnil);
308 if (charset->method == CHARSET_METHOD_MAP)
309 CHARSET_ENCODER (charset) = table;
310 else
311 CHARSET_DEUNIFIER (charset) = table;
312 }
313 }
314 else
315 {
316 if (! temp_charset_work)
317 temp_charset_work = malloc (sizeof (*temp_charset_work));
318 if (control_flag == 1)
319 {
320 memset (temp_charset_work->table.decoder, -1,
321 sizeof (int) * 0x10000);
ecca2aad
KH
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
3263d5a2 333 charset_map_loaded = 1;
2e344af3 334 }
6662e69b 335
e9ce014c 336 min_char = max_char = entries->entry[0].c;
3263d5a2 337 nonascii_min_char = MAX_CHAR;
e9ce014c 338 for (i = 0; i < n_entries; i++)
2e344af3 339 {
e9ce014c 340 unsigned from, to;
3b4f4446
KH
341 int from_index, to_index;
342 int from_c, to_c;
e9ce014c 343 int idx = i % 0x10000;
3263d5a2 344
e9ce014c
KH
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
3b4f4446
KH
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
6662e69b 352 {
3b4f4446
KH
353 to_index = from_index;
354 to_c = from_c;
6662e69b 355 }
3b4f4446 356 else
6662e69b 357 {
3b4f4446
KH
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
6662e69b 360 }
3b4f4446
KH
361 if (from_index < 0 || to_index < 0)
362 continue;
3263d5a2 363
ecca2aad
KH
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
6662e69b 368
ecca2aad
KH
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
3b4f4446
KH
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
177c0ea7 418
ecca2aad
KH
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 421 }
8ac5a9cc 422 }
3263d5a2 423
ecca2aad 424 if (control_flag == 0)
4ed46869 425 {
3263d5a2
KH
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 429 }
ecca2aad
KH
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
4ed46869
KH
435}
436
12bcae05 437
3263d5a2
KH
438/* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
12bcae05 440
3263d5a2
KH
441static INLINE unsigned
442read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
12bcae05 445{
3263d5a2
KH
446 int c;
447 unsigned n;
12bcae05 448
3263d5a2
KH
449 while ((c = getc (fp)) != EOF)
450 {
69f8de5b 451 if (c == '#')
3263d5a2
KH
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
8f924df7 460 }
3263d5a2
KH
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
e9ce014c
KH
475 if (c != EOF)
476 ungetc (c, fp);
3263d5a2
KH
477 return n;
478}
12bcae05 479
dde2559c 480extern Lisp_Object Qfile_name_handler_alist;
537efd8d 481
3263d5a2 482/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
483 Each line of MAPFILE has this form
484 0xAAAA 0xCCCC
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
487 0xAAAA-0xBBBB 0xCCCC
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
4ed46869 490
3263d5a2
KH
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 493 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
494 range.
495
18a10a21
JB
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 498
c449997d 499extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
4ed46869 500
e9ce014c
KH
501static void
502load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
503 struct charset *charset;
504 Lisp_Object mapfile;
e9ce014c 505 int control_flag;
4ed46869 506{
e9ce014c
KH
507 unsigned min_code = CHARSET_MIN_CODE (charset);
508 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
509 int fd;
510 FILE *fp;
3263d5a2
KH
511 int eof;
512 Lisp_Object suffixes;
e9ce014c
KH
513 struct charset_map_entries *head, *entries;
514 int n_entries;
dde2559c 515 int count = SPECPDL_INDEX ();
4ed46869 516
3263d5a2
KH
517 suffixes = Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil));
4ed46869 519
dde2559c 520 specbind (Qfile_name_handler_alist, Qnil);
4beef065 521 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
dde2559c 522 unbind_to (count, Qnil);
3263d5a2
KH
523 if (fd < 0
524 || ! (fp = fdopen (fd, "r")))
8458d4c1 525 error ("Failure in loading charset map: %S", SDATA (mapfile));
4ed46869 526
e9ce014c
KH
527 head = entries = ((struct charset_map_entries *)
528 alloca (sizeof (struct charset_map_entries)));
529 n_entries = 0;
3263d5a2
KH
530 eof = 0;
531 while (1)
532 {
e9ce014c
KH
533 unsigned from, to;
534 int c;
535 int idx;
4ed46869 536
e9ce014c 537 from = read_hex (fp, &eof);
3263d5a2
KH
538 if (eof)
539 break;
e9ce014c
KH
540 if (getc (fp) == '-')
541 to = read_hex (fp, &eof);
542 else
543 to = from;
544 c = (int) read_hex (fp, &eof);
ac4137cc 545
e9ce014c
KH
546 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
547 continue;
ac4137cc 548
e9ce014c 549 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 550 {
e9ce014c
KH
551 entries->next = ((struct charset_map_entries *)
552 alloca (sizeof (struct charset_map_entries)));
553 entries = entries->next;
3263d5a2 554 }
e9ce014c
KH
555 idx = n_entries % 0x10000;
556 entries->entry[idx].from = from;
557 entries->entry[idx].to = to;
558 entries->entry[idx].c = c;
559 n_entries++;
3263d5a2
KH
560 }
561 fclose (fp);
562 close (fd);
177c0ea7 563
e9ce014c 564 load_charset_map (charset, head, n_entries, control_flag);
4ed46869
KH
565}
566
e9ce014c
KH
567static void
568load_charset_map_from_vector (charset, vec, control_flag)
569 struct charset *charset;
570 Lisp_Object vec;
571 int control_flag;
23d2a7f1 572{
e9ce014c
KH
573 unsigned min_code = CHARSET_MIN_CODE (charset);
574 unsigned max_code = CHARSET_MAX_CODE (charset);
575 struct charset_map_entries *head, *entries;
576 int n_entries;
577 int len = ASIZE (vec);
578 int i;
23d2a7f1 579
e9ce014c 580 if (len % 2 == 1)
3263d5a2 581 {
e9ce014c
KH
582 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
583 return;
3263d5a2 584 }
35e623fb 585
e9ce014c
KH
586 head = entries = ((struct charset_map_entries *)
587 alloca (sizeof (struct charset_map_entries)));
588 n_entries = 0;
589 for (i = 0; i < len; i += 2)
35e623fb 590 {
e9ce014c
KH
591 Lisp_Object val, val2;
592 unsigned from, to;
593 int c;
594 int idx;
d2665018 595
e9ce014c
KH
596 val = AREF (vec, i);
597 if (CONSP (val))
bbf12bb3 598 {
e9ce014c
KH
599 val2 = XCDR (val);
600 val = XCAR (val);
601 CHECK_NATNUM (val);
602 CHECK_NATNUM (val2);
603 from = XFASTINT (val);
604 to = XFASTINT (val2);
bbf12bb3 605 }
e9ce014c 606 else
bbf12bb3 607 {
e9ce014c
KH
608 CHECK_NATNUM (val);
609 from = to = XFASTINT (val);
bbf12bb3 610 }
e9ce014c
KH
611 val = AREF (vec, i + 1);
612 CHECK_NATNUM (val);
613 c = XFASTINT (val);
76d7b829 614
e9ce014c
KH
615 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
616 continue;
76d7b829 617
dcc694d7 618 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c
KH
619 {
620 entries->next = ((struct charset_map_entries *)
621 alloca (sizeof (struct charset_map_entries)));
622 entries = entries->next;
623 }
624 idx = n_entries % 0x10000;
625 entries->entry[idx].from = from;
626 entries->entry[idx].to = to;
627 entries->entry[idx].c = c;
628 n_entries++;
629 }
76d7b829 630
e9ce014c 631 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
632}
633
ecca2aad
KH
634
635/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
636 map it is (see the comment of load_charset_map for the detail). */
637
3263d5a2 638static void
ecca2aad 639load_charset (charset, control_flag)
3263d5a2 640 struct charset *charset;
ecca2aad 641 int control_flag;
76d7b829 642{
ecca2aad 643 Lisp_Object map;
76d7b829 644
ecca2aad
KH
645 if (inhibit_load_charset_map
646 && temp_charset_work
647 && charset == temp_charset_work->current
56f00ed2 648 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
649 return;
650
651 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
652 map = CHARSET_MAP (charset);
653 else if (CHARSET_UNIFIED_P (charset))
654 map = CHARSET_UNIFY_MAP (charset);
655 if (STRINGP (map))
656 load_charset_map_from_file (charset, map, control_flag);
657 else
658 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 659}
76d7b829 660
3263d5a2
KH
661
662DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
663 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
664 (object)
665 Lisp_Object object;
23d2a7f1 666{
3263d5a2 667 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
668}
669
4ed46869 670
ecca2aad
KH
671void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
672 Lisp_Object function, Lisp_Object arg,
673 unsigned from, unsigned to));
674
675void
676map_charset_for_dump (c_function, function, arg, from, to)
677 void (*c_function) (Lisp_Object, Lisp_Object);
678 Lisp_Object function, arg;
679 unsigned from, to;
680{
681 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
682 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
683 Lisp_Object range;
684 int c, stop;
685 struct gcpro gcpro1;
686
687 range = Fcons (Qnil, Qnil);
688 GCPRO1 (range);
689
690 c = temp_charset_work->min_char;
691 stop = (temp_charset_work->max_char < 0x20000
692 ? temp_charset_work->max_char : 0xFFFF);
693
694 while (1)
695 {
696 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
697
698 if (index >= from_idx && index <= to_idx)
699 {
700 if (NILP (XCAR (range)))
701 XSETCAR (range, make_number (c));
702 }
703 else if (! NILP (XCAR (range)))
704 {
705 XSETCDR (range, make_number (c - 1));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
710 XSETCAR (range, Qnil);
711 }
712 if (c == stop)
713 {
714 if (c == temp_charset_work->max_char)
715 {
716 if (! NILP (XCAR (range)))
717 {
718 XSETCDR (range, make_number (c));
719 if (c_function)
720 (*c_function) (arg, range);
721 else
722 call2 (function, range, arg);
723 }
724 break;
725 }
726 c = 0x1FFFF;
727 stop = temp_charset_work->max_char;
728 }
729 c++;
730 }
c542407d 731 UNGCPRO;
ecca2aad
KH
732}
733
4ed46869 734void
374c5cfd
KH
735map_charset_chars (c_function, function, arg,
736 charset, from, to)
737 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
738 Lisp_Object function, arg;
739 struct charset *charset;
740 unsigned from, to;
4ed46869 741{
3263d5a2 742 Lisp_Object range;
374c5cfd 743 int partial;
3263d5a2 744
374c5cfd
KH
745 partial = (from > CHARSET_MIN_CODE (charset)
746 || to < CHARSET_MAX_CODE (charset));
747
3263d5a2 748 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 749 {
374c5cfd
KH
750 int from_idx = CODE_POINT_TO_INDEX (charset, from);
751 int to_idx = CODE_POINT_TO_INDEX (charset, to);
752 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
753 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
754
ecca2aad
KH
755 if (CHARSET_UNIFIED_P (charset))
756 {
757 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
758 load_charset (charset, 2);
759 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
760 map_char_table_for_charset (c_function, function,
761 CHARSET_DEUNIFIER (charset), arg,
762 partial ? charset : NULL, from, to);
763 else
764 map_charset_for_dump (c_function, function, arg, from, to);
765 }
766
374c5cfd 767 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 768 if (NILP (function))
5af5dd92 769 (*c_function) (arg, range);
3263d5a2
KH
770 else
771 call2 (function, range, arg);
c83ef371 772 }
3263d5a2
KH
773 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
774 {
775 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
776 load_charset (charset, 2);
777 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
778 map_char_table_for_charset (c_function, function,
779 CHARSET_ENCODER (charset), arg,
780 partial ? charset : NULL, from, to);
781 else
782 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 783 }
374c5cfd 784 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 785 {
374c5cfd
KH
786 Lisp_Object subset_info;
787 int offset;
788
789 subset_info = CHARSET_SUBSET (charset);
790 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
791 offset = XINT (AREF (subset_info, 3));
792 from -= offset;
793 if (from < XFASTINT (AREF (subset_info, 1)))
794 from = XFASTINT (AREF (subset_info, 1));
795 to -= offset;
796 if (to > XFASTINT (AREF (subset_info, 2)))
797 to = XFASTINT (AREF (subset_info, 2));
798 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 799 }
374c5cfd
KH
800 else /* i.e. CHARSET_METHOD_SUPERSET */
801 {
802 Lisp_Object parents;
4ed46869 803
374c5cfd
KH
804 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
805 parents = XCDR (parents))
bbf12bb3 806 {
374c5cfd
KH
807 int offset;
808 unsigned this_from, this_to;
809
810 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
811 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
812 this_from = from > offset ? from - offset : 0;
813 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
814 if (this_from < CHARSET_MIN_CODE (charset))
815 this_from = CHARSET_MIN_CODE (charset);
816 if (this_to > CHARSET_MAX_CODE (charset))
817 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
818 map_charset_chars (c_function, function, arg, charset,
819 this_from, this_to);
bbf12bb3 820 }
35e623fb 821 }
4ed46869
KH
822}
823
374c5cfd 824DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 825 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 826FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 827argument ARG.
4ed46869 828
374c5cfd
KH
829RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
830characters contained in CHARSET.
4ed46869 831
374c5cfd 832The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 833range of code points (in CHARSET) of target characters. */)
374c5cfd
KH
834 (function, charset, arg, from_code, to_code)
835 Lisp_Object function, charset, arg, from_code, to_code;
4ed46869 836{
374c5cfd 837 struct charset *cs;
16fed1fc 838 unsigned from, to;
4ed46869 839
374c5cfd
KH
840 CHECK_CHARSET_GET_CHARSET (charset, cs);
841 if (NILP (from_code))
16fed1fc 842 from = CHARSET_MIN_CODE (cs);
970b7474 843 else
4ed46869 844 {
970b7474
KH
845 CHECK_NATNUM (from_code);
846 from = XINT (from_code);
847 if (from < CHARSET_MIN_CODE (cs))
848 from = CHARSET_MIN_CODE (cs);
4ed46869 849 }
374c5cfd 850 if (NILP (to_code))
970b7474 851 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
852 else
853 {
970b7474
KH
854 CHECK_NATNUM (to_code);
855 to = XINT (to_code);
856 if (to > CHARSET_MAX_CODE (cs))
857 to = CHARSET_MAX_CODE (cs);
4ed46869 858 }
16fed1fc 859 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 860 return Qnil;
35e623fb 861}
4ed46869 862
4ed46869 863
3263d5a2
KH
864/* Define a charset according to the arguments. The Nth argument is
865 the Nth attribute of the charset (the last attribute `charset-id'
866 is not included). See the docstring of `define-charset' for the
867 detail. */
4ed46869 868
3263d5a2
KH
869DEFUN ("define-charset-internal", Fdefine_charset_internal,
870 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
871 doc: /* For internal use only.
872usage: (define-charset-internal ...) */)
3263d5a2
KH
873 (nargs, args)
874 int nargs;
875 Lisp_Object *args;
4ed46869 876{
3263d5a2
KH
877 /* Charset attr vector. */
878 Lisp_Object attrs;
879 Lisp_Object val;
880 unsigned hash_code;
881 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 882 int i, j;
3263d5a2
KH
883 struct charset charset;
884 int id;
885 int dimension;
886 int new_definition_p;
887 int nchars;
888
889 if (nargs != charset_arg_max)
890 return Fsignal (Qwrong_number_of_arguments,
891 Fcons (intern ("define-charset-internal"),
892 make_number (nargs)));
893
894 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
895
896 CHECK_SYMBOL (args[charset_arg_name]);
897 ASET (attrs, charset_name, args[charset_arg_name]);
898
899 val = args[charset_arg_code_space];
900 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 901 {
3263d5a2
KH
902 int min_byte, max_byte;
903
904 min_byte = XINT (Faref (val, make_number (i * 2)));
905 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
906 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
907 error ("Invalid :code-space value");
908 charset.code_space[i * 4] = min_byte;
909 charset.code_space[i * 4 + 1] = max_byte;
910 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
911 nchars *= charset.code_space[i * 4 + 2];
912 charset.code_space[i * 4 + 3] = nchars;
913 if (max_byte > 0)
914 dimension = i + 1;
915 }
4ed46869 916
3263d5a2
KH
917 val = args[charset_arg_dimension];
918 if (NILP (val))
919 charset.dimension = dimension;
920 else
4ed46869 921 {
3263d5a2
KH
922 CHECK_NATNUM (val);
923 charset.dimension = XINT (val);
924 if (charset.dimension < 1 || charset.dimension > 4)
925 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
926 }
927
3263d5a2
KH
928 charset.code_linear_p
929 = (charset.dimension == 1
930 || (charset.code_space[2] == 256
931 && (charset.dimension == 2
932 || (charset.code_space[6] == 256
933 && (charset.dimension == 3
934 || charset.code_space[10] == 256)))));
935
69f8de5b 936 if (! charset.code_linear_p)
4ed46869 937 {
69f8de5b 938 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 939 bzero (charset.code_space_mask, 256);
69f8de5b
KH
940 for (i = 0; i < 4; i++)
941 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
942 j++)
943 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
944 }
945
3263d5a2 946 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 947
3263d5a2
KH
948 charset.min_code = (charset.code_space[0]
949 | (charset.code_space[4] << 8)
950 | (charset.code_space[8] << 16)
951 | (charset.code_space[12] << 24));
952 charset.max_code = (charset.code_space[1]
953 | (charset.code_space[5] << 8)
954 | (charset.code_space[9] << 16)
955 | (charset.code_space[13] << 24));
820ee249 956 charset.char_index_offset = 0;
8a73a704 957
820ee249
KH
958 val = args[charset_arg_min_code];
959 if (! NILP (val))
960 {
961 unsigned code;
fdb82f93 962
820ee249
KH
963 if (INTEGERP (val))
964 code = XINT (val);
965 else
966 {
967 CHECK_CONS (val);
8f924df7
KH
968 CHECK_NUMBER_CAR (val);
969 CHECK_NUMBER_CDR (val);
820ee249
KH
970 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
971 }
972 if (code < charset.min_code
973 || code > charset.max_code)
974 args_out_of_range_3 (make_number (charset.min_code),
975 make_number (charset.max_code), val);
976 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
977 charset.min_code = code;
978 }
3fac5a51 979
820ee249
KH
980 val = args[charset_arg_max_code];
981 if (! NILP (val))
3fac5a51 982 {
820ee249
KH
983 unsigned code;
984
985 if (INTEGERP (val))
986 code = XINT (val);
987 else
988 {
989 CHECK_CONS (val);
8f924df7
KH
990 CHECK_NUMBER_CAR (val);
991 CHECK_NUMBER_CDR (val);
820ee249
KH
992 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
993 }
994 if (code < charset.min_code
995 || code > charset.max_code)
996 args_out_of_range_3 (make_number (charset.min_code),
997 make_number (charset.max_code), val);
998 charset.max_code = code;
3fac5a51 999 }
3fac5a51 1000
ecca2aad 1001 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 1002
3263d5a2
KH
1003 val = args[charset_arg_invalid_code];
1004 if (NILP (val))
1005 {
1006 if (charset.min_code > 0)
1007 charset.invalid_code = 0;
bbf12bb3
KH
1008 else
1009 {
3263d5a2
KH
1010 XSETINT (val, charset.max_code + 1);
1011 if (XINT (val) == charset.max_code + 1)
1012 charset.invalid_code = charset.max_code + 1;
1013 else
1014 error ("Attribute :invalid-code must be specified");
76d7b829 1015 }
76d7b829 1016 }
3263d5a2
KH
1017 else
1018 {
1019 CHECK_NATNUM (val);
1020 charset.invalid_code = XFASTINT (val);
1021 }
4ed46869 1022
3263d5a2
KH
1023 val = args[charset_arg_iso_final];
1024 if (NILP (val))
1025 charset.iso_final = -1;
1026 else
1027 {
1028 CHECK_NUMBER (val);
1029 if (XINT (val) < '0' || XINT (val) > 127)
1030 error ("Invalid iso-final-char: %d", XINT (val));
1031 charset.iso_final = XINT (val);
1032 }
4ed46869 1033
3263d5a2
KH
1034 val = args[charset_arg_iso_revision];
1035 if (NILP (val))
1036 charset.iso_revision = -1;
1037 else
4ed46869 1038 {
3263d5a2
KH
1039 CHECK_NUMBER (val);
1040 if (XINT (val) > 63)
1041 args_out_of_range (make_number (63), val);
1042 charset.iso_revision = XINT (val);
4ed46869 1043 }
4ed46869 1044
3263d5a2
KH
1045 val = args[charset_arg_emacs_mule_id];
1046 if (NILP (val))
1047 charset.emacs_mule_id = -1;
4ed46869
KH
1048 else
1049 {
3263d5a2
KH
1050 CHECK_NATNUM (val);
1051 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1052 error ("Invalid emacs-mule-id: %d", XINT (val));
1053 charset.emacs_mule_id = XINT (val);
c83ef371 1054 }
f6302ac9 1055
3263d5a2 1056 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1057
3263d5a2 1058 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1059
3263d5a2
KH
1060 charset.unified_p = 0;
1061
1062 bzero (charset.fast_map, sizeof (charset.fast_map));
1063
1064 if (! NILP (args[charset_arg_code_offset]))
1065 {
1066 val = args[charset_arg_code_offset];
1067 CHECK_NUMBER (val);
1068
1069 charset.method = CHARSET_METHOD_OFFSET;
1070 charset.code_offset = XINT (val);
1071
1072 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1073 charset.min_char = i + charset.code_offset;
1074 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1075 charset.max_char = i + charset.code_offset;
1076 if (charset.max_char > MAX_CHAR)
1077 error ("Unsupported max char: %d", charset.max_char);
1078
f148205f
KH
1079 i = (charset.min_char >> 7) << 7;
1080 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1081 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1082 i = (i >> 12) << 12;
3263d5a2
KH
1083 for (; i <= charset.max_char; i += 0x1000)
1084 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1085 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1086 charset.ascii_compatible_p = 1;
3263d5a2
KH
1087 }
1088 else if (! NILP (args[charset_arg_map]))
1089 {
1090 val = args[charset_arg_map];
1091 ASET (attrs, charset_map, val);
ecca2aad 1092 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1093 }
374c5cfd 1094 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1095 {
374c5cfd
KH
1096 Lisp_Object parent;
1097 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1098 struct charset *parent_charset;
1099
1100 val = args[charset_arg_subset];
1101 parent = Fcar (val);
1102 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1103 parent_min_code = Fnth (make_number (1), val);
1104 CHECK_NATNUM (parent_min_code);
1105 parent_max_code = Fnth (make_number (2), val);
1106 CHECK_NATNUM (parent_max_code);
1107 parent_code_offset = Fnth (make_number (3), val);
1108 CHECK_NUMBER (parent_code_offset);
1109 val = Fmake_vector (make_number (4), Qnil);
1110 ASET (val, 0, make_number (parent_charset->id));
1111 ASET (val, 1, parent_min_code);
1112 ASET (val, 2, parent_max_code);
1113 ASET (val, 3, parent_code_offset);
1114 ASET (attrs, charset_subset, val);
1115
1116 charset.method = CHARSET_METHOD_SUBSET;
1117 /* Here, we just copy the parent's fast_map. It's not accurate,
1118 but at least it works for quickly detecting which character
1119 DOESN'T belong to this charset. */
1120 for (i = 0; i < 190; i++)
1121 charset.fast_map[i] = parent_charset->fast_map[i];
1122
1123 /* We also copy these for parents. */
1124 charset.min_char = parent_charset->min_char;
1125 charset.max_char = parent_charset->max_char;
1126 }
1127 else if (! NILP (args[charset_arg_superset]))
0282eb69 1128 {
374c5cfd
KH
1129 val = args[charset_arg_superset];
1130 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1131 val = Fcopy_sequence (val);
374c5cfd 1132 ASET (attrs, charset_superset, val);
3263d5a2
KH
1133
1134 charset.min_char = MAX_CHAR;
1135 charset.max_char = 0;
1136 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1137 {
3263d5a2
KH
1138 Lisp_Object elt, car_part, cdr_part;
1139 int this_id, offset;
1140 struct charset *this_charset;
2e344af3 1141
3263d5a2
KH
1142 elt = Fcar (val);
1143 if (CONSP (elt))
2e344af3 1144 {
3263d5a2
KH
1145 car_part = XCAR (elt);
1146 cdr_part = XCDR (elt);
1147 CHECK_CHARSET_GET_ID (car_part, this_id);
1148 CHECK_NUMBER (cdr_part);
1149 offset = XINT (cdr_part);
177c0ea7 1150 }
3263d5a2 1151 else
4ed46869 1152 {
3263d5a2
KH
1153 CHECK_CHARSET_GET_ID (elt, this_id);
1154 offset = 0;
4ed46869 1155 }
3263d5a2
KH
1156 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1157
1158 this_charset = CHARSET_FROM_ID (this_id);
1159 if (charset.min_char > this_charset->min_char)
1160 charset.min_char = this_charset->min_char;
1161 if (charset.max_char < this_charset->max_char)
1162 charset.max_char = this_charset->max_char;
1163 for (i = 0; i < 190; i++)
1164 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1165 }
0282eb69 1166 }
2e344af3 1167 else
3263d5a2 1168 error ("None of :code-offset, :map, :parents are specified");
05505664 1169
3263d5a2
KH
1170 val = args[charset_arg_unify_map];
1171 if (! NILP (val) && !STRINGP (val))
1172 CHECK_VECTOR (val);
1173 ASET (attrs, charset_unify_map, val);
05505664 1174
3263d5a2
KH
1175 CHECK_LIST (args[charset_arg_plist]);
1176 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1177
3263d5a2
KH
1178 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1179 &hash_code);
1180 if (charset.hash_index >= 0)
1181 {
1182 new_definition_p = 0;
4f65af01 1183 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1184 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1185 }
1a45ff10 1186 else
3263d5a2
KH
1187 {
1188 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1189 hash_code);
1190 if (charset_table_used == charset_table_size)
1191 {
2fe1edd1
KH
1192 struct charset *new_table
1193 = (struct charset *) xmalloc (sizeof (struct charset)
1194 * (charset_table_size + 16));
1195 bcopy (charset_table, new_table,
1196 sizeof (struct charset) * charset_table_size);
1197 charset_table_size += 16;
1198 charset_table = new_table;
3263d5a2
KH
1199 }
1200 id = charset_table_used++;
3263d5a2
KH
1201 new_definition_p = 1;
1202 }
2e344af3 1203
4f65af01 1204 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1205 charset.id = id;
1206 charset_table[id] = charset;
2e344af3 1207
ecca2aad 1208 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1209 {
1210 load_charset (&charset, 0);
1211 charset_table[id] = charset;
1212 }
ecca2aad 1213
3263d5a2 1214 if (charset.iso_final >= 0)
4ed46869 1215 {
3263d5a2
KH
1216 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1217 charset.iso_final) = id;
1218 if (new_definition_p)
1219 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1220 Fcons (make_number (id), Qnil));
7c7dceee
KH
1221 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1222 charset_jisx0201_roman = id;
1223 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1224 charset_jisx0208_1978 = id;
1225 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1226 charset_jisx0208 = id;
d32320c4
KH
1227 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1228 charset_ksc5601 = id;
4ed46869 1229 }
d0cf2d48 1230
3263d5a2 1231 if (charset.emacs_mule_id >= 0)
4ed46869 1232 {
3263d5a2 1233 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1234 if (charset.emacs_mule_id < 0xA0)
1235 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1236 else
1237 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1238 if (new_definition_p)
1239 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1240 Fcons (make_number (id), Qnil));
4ed46869
KH
1241 }
1242
3263d5a2
KH
1243 if (new_definition_p)
1244 {
1245 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1246 if (charset.supplementary_p)
1247 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1248 Fcons (make_number (id), Qnil));
1249 else
880820fe
KH
1250 {
1251 Lisp_Object tail;
1252
1253 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1254 {
1255 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1256
1257 if (cs->supplementary_p)
1258 break;
1259 }
1260 if (EQ (tail, Vcharset_ordered_list))
1261 Vcharset_ordered_list = Fcons (make_number (id),
1262 Vcharset_ordered_list);
1263 else if (NILP (tail))
1264 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1265 Fcons (make_number (id), Qnil));
1266 else
1267 {
1268 val = Fcons (XCAR (tail), XCDR (tail));
1269 XSETCDR (tail, val);
1270 XSETCAR (tail, make_number (id));
1271 }
1272 }
dbbb237d 1273 charset_ordered_list_tick++;
3263d5a2 1274 }
4ed46869 1275
3263d5a2 1276 return Qnil;
4ed46869
KH
1277}
1278
2fe1edd1
KH
1279
1280/* Same as Fdefine_charset_internal but arguments are more convenient
1281 to call from C (typically in syms_of_charset). This can define a
1282 charset of `offset' method only. Return the ID of the new
1283 charset. */
1284
1285static int
1286define_charset_internal (name, dimension, code_space, min_code, max_code,
1287 iso_final, iso_revision, emacs_mule_id,
7acf89e6 1288 ascii_compatible, supplementary,
2fe1edd1
KH
1289 code_offset)
1290 Lisp_Object name;
1291 int dimension;
1292 unsigned char *code_space;
1293 unsigned min_code, max_code;
1294 int iso_final, iso_revision, emacs_mule_id;
7acf89e6 1295 int ascii_compatible, supplementary;
2fe1edd1
KH
1296 int code_offset;
1297{
1298 Lisp_Object args[charset_arg_max];
1299 Lisp_Object plist[14];
1300 Lisp_Object val;
1301 int i;
1302
1303 args[charset_arg_name] = name;
1304 args[charset_arg_dimension] = make_number (dimension);
1305 val = Fmake_vector (make_number (8), make_number (0));
1306 for (i = 0; i < 8; i++)
1307 ASET (val, i, make_number (code_space[i]));
1308 args[charset_arg_code_space] = val;
1309 args[charset_arg_min_code] = make_number (min_code);
1310 args[charset_arg_max_code] = make_number (max_code);
1311 args[charset_arg_iso_final]
1312 = (iso_final < 0 ? Qnil : make_number (iso_final));
1313 args[charset_arg_iso_revision] = make_number (iso_revision);
1314 args[charset_arg_emacs_mule_id]
1315 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1316 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1317 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1318 args[charset_arg_invalid_code] = Qnil;
1319 args[charset_arg_code_offset] = make_number (code_offset);
1320 args[charset_arg_map] = Qnil;
1321 args[charset_arg_subset] = Qnil;
1322 args[charset_arg_superset] = Qnil;
1323 args[charset_arg_unify_map] = Qnil;
1324
1325 plist[0] = intern (":name");
1326 plist[1] = args[charset_arg_name];
1327 plist[2] = intern (":dimension");
1328 plist[3] = args[charset_arg_dimension];
1329 plist[4] = intern (":code-space");
1330 plist[5] = args[charset_arg_code_space];
1331 plist[6] = intern (":iso-final-char");
1332 plist[7] = args[charset_arg_iso_final];
1333 plist[8] = intern (":emacs-mule-id");
1334 plist[9] = args[charset_arg_emacs_mule_id];
1335 plist[10] = intern (":ascii-compatible-p");
1336 plist[11] = args[charset_arg_ascii_compatible_p];
1337 plist[12] = intern (":code-offset");
1338 plist[13] = args[charset_arg_code_offset];
1339
1340 args[charset_arg_plist] = Flist (14, plist);
1341 Fdefine_charset_internal (charset_arg_max, args);
1342
1343 return XINT (CHARSET_SYMBOL_ID (name));
1344}
1345
1346
3263d5a2
KH
1347DEFUN ("define-charset-alias", Fdefine_charset_alias,
1348 Sdefine_charset_alias, 2, 2, 0,
1349 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1350 (alias, charset)
1351 Lisp_Object alias, charset;
4ed46869 1352{
3263d5a2
KH
1353 Lisp_Object attr;
1354
1355 CHECK_CHARSET_GET_ATTR (charset, attr);
1356 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1357 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1358 return Qnil;
1359}
4ed46869 1360
4ed46869 1361
3263d5a2 1362DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1363 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1364 (charset)
1365 Lisp_Object charset;
1366{
1367 Lisp_Object attrs;
1368
1369 CHECK_CHARSET_GET_ATTR (charset, attrs);
1370 return CHARSET_ATTR_PLIST (attrs);
1371}
1372
1373
1374DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1375 doc: /* Set CHARSET's property list to PLIST. */)
1376 (charset, plist)
1377 Lisp_Object charset, plist;
1378{
1379 Lisp_Object attrs;
1380
1381 CHECK_CHARSET_GET_ATTR (charset, attrs);
1382 CHARSET_ATTR_PLIST (attrs) = plist;
1383 return plist;
1384}
1385
1386
dbbb237d 1387DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1388 doc: /* Unify characters of CHARSET with Unicode.
1389This means reading the relevant file and installing the table defined
dbbb237d
KH
1390by CHARSET's `:unify-map' property.
1391
64165ae2
DL
1392Optional second arg UNIFY-MAP is a file name string or a vector. It has
1393the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1394`define-charset' (which see).
1395
1396Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1397 (charset, unify_map, deunify)
1398 Lisp_Object charset, unify_map, deunify;
8a73a704 1399{
3263d5a2
KH
1400 int id;
1401 struct charset *cs;
8f924df7 1402
3263d5a2
KH
1403 CHECK_CHARSET_GET_ID (charset, id);
1404 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1405 if (NILP (deunify)
1406 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1407 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1408 return Qnil;
dbbb237d 1409
3263d5a2 1410 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1411 if (NILP (deunify))
1412 {
ecca2aad
KH
1413 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1414 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1415 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1416 if (NILP (unify_map))
1417 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1418 else
ecca2aad
KH
1419 {
1420 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1421 signal_error ("Bad unify-map", unify_map);
1422 CHARSET_UNIFY_MAP (cs) = unify_map;
1423 }
1424 if (NILP (Vchar_unify_table))
1425 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1426 char_table_set_range (Vchar_unify_table,
1427 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1428 CHARSET_UNIFIED_P (cs) = 1;
1429 }
1430 else if (CHAR_TABLE_P (Vchar_unify_table))
1431 {
1432 int min_code = CHARSET_MIN_CODE (cs);
1433 int max_code = CHARSET_MAX_CODE (cs);
1434 int min_char = DECODE_CHAR (cs, min_code);
1435 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1436
dbbb237d
KH
1437 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1438 }
8f924df7 1439
3263d5a2 1440 return Qnil;
8a73a704
KH
1441}
1442
3fac5a51
KH
1443DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1444 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1445 doc: /*
d0cf2d48 1446Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1447DIMENSION is the number of bytes to represent a character: 1 or 2.
1448CHARS is the number of characters in a dimension: 94 or 96.
1449
1450This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1451If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1452return nil. */)
1453 (dimension, chars)
3fac5a51
KH
1454 Lisp_Object dimension, chars;
1455{
1456 int final_char;
1457
b7826503
PJ
1458 CHECK_NUMBER (dimension);
1459 CHECK_NUMBER (chars);
3263d5a2
KH
1460 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1461 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1462 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1463 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1464 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1465 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1466 break;
3fac5a51
KH
1467 return (final_char <= '?' ? make_number (final_char) : Qnil);
1468}
1469
3263d5a2
KH
1470static void
1471check_iso_charset_parameter (dimension, chars, final_char)
1472 Lisp_Object dimension, chars, final_char;
4ed46869 1473{
3263d5a2
KH
1474 CHECK_NATNUM (dimension);
1475 CHECK_NATNUM (chars);
1476 CHECK_NATNUM (final_char);
4ed46869 1477
3263d5a2
KH
1478 if (XINT (dimension) > 3)
1479 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1480 if (XINT (chars) != 94 && XINT (chars) != 96)
1481 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1482 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1483 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1484}
1485
1486
1487DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1488 4, 4, 0,
cefd8c4f
KH
1489 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1490
1491On decoding by an ISO-2022 base coding system, when a charset
1492specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1493if CHARSET is designated instead. */)
3263d5a2
KH
1494 (dimension, chars, final_char, charset)
1495 Lisp_Object dimension, chars, final_char, charset;
1496{
1497 int id;
82215ce9 1498 int chars_flag;
4ed46869 1499
3263d5a2
KH
1500 CHECK_CHARSET_GET_ID (charset, id);
1501 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1502 chars_flag = XINT (chars) == 96;
1503 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1504 return Qnil;
1505}
1506
3263d5a2 1507
2e344af3
KH
1508/* Return information about charsets in the text at PTR of NBYTES
1509 bytes, which are NCHARS characters. The value is:
f6302ac9 1510
cfe34140 1511 0: Each character is represented by one byte. This is always
3263d5a2
KH
1512 true for a unibyte string. For a multibyte string, true if
1513 it contains only ASCII characters.
1514
28c026cd
DL
1515 1: No charsets other than ascii, control-1, and latin-1 are
1516 found.
1d67c29b 1517
3263d5a2
KH
1518 2: Otherwise.
1519*/
4ed46869
KH
1520
1521int
3263d5a2
KH
1522string_xstring_p (string)
1523 Lisp_Object string;
4ed46869 1524{
8f924df7
KH
1525 const unsigned char *p = SDATA (string);
1526 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1527
8f924df7 1528 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1529 return 0;
1530
3263d5a2 1531 while (p < endp)
0282eb69 1532 {
3263d5a2 1533 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1534
3cc67a4d 1535 if (c >= 0x100)
3263d5a2 1536 return 2;
0282eb69 1537 }
3263d5a2
KH
1538 return 1;
1539}
05505664 1540
05505664 1541
3263d5a2 1542/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1543
3cc67a4d
KH
1544 CHARSETS is a vector. If Nth element is non-nil, it means the
1545 charset whose id is N is already found.
2e344af3 1546
3263d5a2 1547 It may lookup a translation table TABLE if supplied. */
2e344af3 1548
3263d5a2 1549static void
3cc67a4d 1550find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
dbbb237d 1551 const unsigned char *ptr;
42ca828e 1552 EMACS_INT nchars, nbytes;
3263d5a2 1553 Lisp_Object charsets, table;
3cc67a4d 1554 int multibyte;
3263d5a2 1555{
dbbb237d 1556 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1557
1558 if (nchars == nbytes)
3263d5a2 1559 {
3cc67a4d
KH
1560 if (multibyte)
1561 ASET (charsets, charset_ascii, Qt);
1562 else
1563 while (ptr < pend)
1564 {
1565 int c = *ptr++;
1566
1567 if (!NILP (table))
1568 c = translate_char (table, c);
1569 if (ASCII_BYTE_P (c))
1570 ASET (charsets, charset_ascii, Qt);
1571 else
1572 ASET (charsets, charset_eight_bit, Qt);
1573 }
1574 }
1575 else
1576 {
1577 while (ptr < pend)
3263d5a2 1578 {
3cc67a4d
KH
1579 int c = STRING_CHAR_ADVANCE (ptr);
1580 struct charset *charset;
3263d5a2 1581
3cc67a4d
KH
1582 if (!NILP (table))
1583 c = translate_char (table, c);
1584 charset = CHAR_CHARSET (c);
1585 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1586 }
4ed46869 1587 }
4ed46869
KH
1588}
1589
1590DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1591 2, 3, 0,
fdb82f93
PJ
1592 doc: /* Return a list of charsets in the region between BEG and END.
1593BEG and END are buffer positions.
1594Optional arg TABLE if non-nil is a translation table to look up.
1595
fdb82f93
PJ
1596If the current buffer is unibyte, the returned list may contain
1597only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1598 (beg, end, table)
23d2a7f1 1599 Lisp_Object beg, end, table;
4ed46869 1600{
3263d5a2 1601 Lisp_Object charsets;
42ca828e
DL
1602 EMACS_INT from, from_byte, to, stop, stop_byte;
1603 int i;
4ed46869 1604 Lisp_Object val;
3cc67a4d 1605 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1606
1607 validate_region (&beg, &end);
1608 from = XFASTINT (beg);
1609 stop = to = XFASTINT (end);
6ae1f27e 1610
4ed46869 1611 if (from < GPT && GPT < to)
6ae1f27e
RS
1612 {
1613 stop = GPT;
1614 stop_byte = GPT_BYTE;
1615 }
1616 else
1617 stop_byte = CHAR_TO_BYTE (stop);
1618
1619 from_byte = CHAR_TO_BYTE (from);
1620
3263d5a2 1621 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1622 while (1)
1623 {
3263d5a2 1624 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1625 stop_byte - from_byte, charsets, table,
1626 multibyte);
4ed46869 1627 if (stop < to)
6ae1f27e
RS
1628 {
1629 from = stop, from_byte = stop_byte;
1630 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1631 }
4ed46869
KH
1632 else
1633 break;
1634 }
6ae1f27e 1635
4ed46869 1636 val = Qnil;
3263d5a2 1637 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1638 if (!NILP (AREF (charsets, i)))
3263d5a2 1639 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1640 return val;
1641}
1642
1643DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1644 1, 2, 0,
fdb82f93
PJ
1645 doc: /* Return a list of charsets in STR.
1646Optional arg TABLE if non-nil is a translation table to look up.
1647
fdb82f93 1648If STR is unibyte, the returned list may contain
3263d5a2 1649only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1650 (str, table)
23d2a7f1 1651 Lisp_Object str, table;
4ed46869 1652{
3263d5a2 1653 Lisp_Object charsets;
4ed46869
KH
1654 int i;
1655 Lisp_Object val;
1656
b7826503 1657 CHECK_STRING (str);
87b089ad 1658
3263d5a2 1659 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1660 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1661 charsets, table,
1662 STRING_MULTIBYTE (str));
4ed46869 1663 val = Qnil;
3263d5a2 1664 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1665 if (!NILP (AREF (charsets, i)))
3263d5a2 1666 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1667 return val;
1668}
2e344af3 1669
4ed46869 1670\f
3263d5a2 1671
ecca2aad
KH
1672/* Return a unified character code for C (>= 0x110000). VAL is a
1673 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1674 charset symbol. */
1675int
1676maybe_unify_char (c, val)
1677 int c;
1678 Lisp_Object val;
1679{
1680 struct charset *charset;
1681
1682 if (INTEGERP (val))
1683 return XINT (val);
1684 if (NILP (val))
1685 return c;
1686
1687 CHECK_CHARSET_GET_CHARSET (val, charset);
1688 load_charset (charset, 1);
1689 if (! inhibit_load_charset_map)
1690 {
1691 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1692 if (! NILP (val))
1693 c = XINT (val);
1694 }
1695 else
1696 {
1697 int code_index = c - CHARSET_CODE_OFFSET (charset);
1698 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1699
1700 if (unified > 0)
1701 c = unified;
1702 }
1703 return c;
1704}
1705
1706
3263d5a2
KH
1707/* Return a character correponding to the code-point CODE of
1708 CHARSET. */
1709
1710int
1711decode_char (charset, code)
1712 struct charset *charset;
1713 unsigned code;
4ed46869 1714{
3263d5a2
KH
1715 int c, char_index;
1716 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1717
3263d5a2
KH
1718 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1719 return -1;
4ed46869 1720
374c5cfd 1721 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1722 {
374c5cfd
KH
1723 Lisp_Object subset_info;
1724
1725 subset_info = CHARSET_SUBSET (charset);
1726 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1727 code -= XINT (AREF (subset_info, 3));
1728 if (code < XFASTINT (AREF (subset_info, 1))
1729 || code > XFASTINT (AREF (subset_info, 2)))
1730 c = -1;
1731 else
1732 c = DECODE_CHAR (charset, code);
2e344af3 1733 }
374c5cfd 1734 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1735 {
3263d5a2 1736 Lisp_Object parents;
4ed46869 1737
374c5cfd 1738 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1739 c = -1;
1740 for (; CONSP (parents); parents = XCDR (parents))
1741 {
1742 int id = XINT (XCAR (XCAR (parents)));
1743 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1744 unsigned this_code = code - code_offset;
4ed46869 1745
3263d5a2
KH
1746 charset = CHARSET_FROM_ID (id);
1747 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1748 break;
1749 }
1750 }
1751 else
ac4137cc 1752 {
3263d5a2 1753 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1754 if (char_index < 0)
1755 return -1;
4ed46869 1756
3263d5a2 1757 if (method == CHARSET_METHOD_MAP)
ac4137cc 1758 {
3263d5a2 1759 Lisp_Object decoder;
4ed46869 1760
3263d5a2
KH
1761 decoder = CHARSET_DECODER (charset);
1762 if (! VECTORP (decoder))
ecca2aad
KH
1763 {
1764 load_charset (charset, 1);
1765 decoder = CHARSET_DECODER (charset);
1766 }
1767 if (VECTORP (decoder))
1768 c = XINT (AREF (decoder, char_index));
1769 else
1770 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1771 }
ecca2aad 1772 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1773 {
3263d5a2 1774 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1775 if (CHARSET_UNIFIED_P (charset)
1776 && c > MAX_UNICODE_CHAR)
1777 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1778 }
1779 }
4ed46869 1780
3263d5a2 1781 return c;
90d7b74e
KH
1782}
1783
374c5cfd
KH
1784/* Variable used temporarily by the macro ENCODE_CHAR. */
1785Lisp_Object charset_work;
4ed46869 1786
3263d5a2 1787/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1788 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1789 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1790
3263d5a2
KH
1791unsigned
1792encode_char (charset, c)
1793 struct charset *charset;
9b6a601f 1794 int c;
9d3d8cba 1795{
3263d5a2
KH
1796 unsigned code;
1797 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1798
3263d5a2 1799 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1800 {
6809ca75 1801 Lisp_Object deunifier;
ecca2aad 1802 int code_index = -1;
4ed46869 1803
3263d5a2
KH
1804 deunifier = CHARSET_DEUNIFIER (charset);
1805 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1806 {
ecca2aad 1807 load_charset (charset, 2);
3263d5a2 1808 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1809 }
ecca2aad
KH
1810 if (CHAR_TABLE_P (deunifier))
1811 {
1812 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1813
1814 if (INTEGERP (deunified))
1815 code_index = XINT (deunified);
1816 }
1817 else
1818 {
1819 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1820 }
1821 if (code_index >= 0)
1822 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1823 }
9d3d8cba 1824
374c5cfd
KH
1825 if (method == CHARSET_METHOD_SUBSET)
1826 {
1827 Lisp_Object subset_info;
1828 struct charset *this_charset;
1829
1830 subset_info = CHARSET_SUBSET (charset);
1831 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1832 code = ENCODE_CHAR (this_charset, c);
1833 if (code == CHARSET_INVALID_CODE (this_charset)
1834 || code < XFASTINT (AREF (subset_info, 1))
1835 || code > XFASTINT (AREF (subset_info, 2)))
1836 return CHARSET_INVALID_CODE (charset);
1837 code += XINT (AREF (subset_info, 3));
1838 return code;
1839 }
9d3d8cba 1840
374c5cfd 1841 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1842 {
3263d5a2 1843 Lisp_Object parents;
d2665018 1844
374c5cfd 1845 parents = CHARSET_SUPERSET (charset);
3263d5a2 1846 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1847 {
3263d5a2
KH
1848 int id = XINT (XCAR (XCAR (parents)));
1849 int code_offset = XINT (XCDR (XCAR (parents)));
1850 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1851
3263d5a2 1852 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1853 if (code != CHARSET_INVALID_CODE (this_charset))
1854 return code + code_offset;
beeedaad 1855 }
3263d5a2
KH
1856 return CHARSET_INVALID_CODE (charset);
1857 }
1bcc1567 1858
15c85a88
KH
1859 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1860 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1861 return CHARSET_INVALID_CODE (charset);
1bcc1567 1862
3263d5a2 1863 if (method == CHARSET_METHOD_MAP)
3f62427c 1864 {
3263d5a2 1865 Lisp_Object encoder;
beeedaad 1866 Lisp_Object val;
9b6a601f 1867
3263d5a2
KH
1868 encoder = CHARSET_ENCODER (charset);
1869 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1870 {
1871 load_charset (charset, 2);
1872 encoder = CHARSET_ENCODER (charset);
1873 }
1874 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1875 {
1876 val = CHAR_TABLE_REF (encoder, c);
1877 if (NILP (val))
1878 return CHARSET_INVALID_CODE (charset);
1879 code = XINT (val);
1880 if (! CHARSET_COMPACT_CODES_P (charset))
1881 code = INDEX_TO_CODE_POINT (charset, code);
1882 }
1883 else
1884 {
1885 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1886 code = INDEX_TO_CODE_POINT (charset, code);
1887 }
3263d5a2 1888 }
820ee249 1889 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1890 {
ecca2aad
KH
1891 int code_index = c - CHARSET_CODE_OFFSET (charset);
1892
1893 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1894 }
8ac5a9cc 1895
3263d5a2 1896 return code;
4ed46869
KH
1897}
1898
4ed46869 1899
3263d5a2
KH
1900DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1901 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1902Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1903
3263d5a2 1904CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1905
3263d5a2 1906Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1907and CODE-POINT to a character. Currently not supported and just ignored. */)
3263d5a2
KH
1908 (charset, code_point, restriction)
1909 Lisp_Object charset, code_point, restriction;
4ed46869 1910{
3263d5a2
KH
1911 int c, id;
1912 unsigned code;
1913 struct charset *charsetp;
859f2b3c 1914
3263d5a2
KH
1915 CHECK_CHARSET_GET_ID (charset, id);
1916 if (CONSP (code_point))
1917 {
8f924df7
KH
1918 CHECK_NATNUM_CAR (code_point);
1919 CHECK_NATNUM_CDR (code_point);
69f8de5b 1920 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1921 }
4ed46869
KH
1922 else
1923 {
3263d5a2
KH
1924 CHECK_NATNUM (code_point);
1925 code = XINT (code_point);
4ed46869 1926 }
3263d5a2
KH
1927 charsetp = CHARSET_FROM_ID (id);
1928 c = DECODE_CHAR (charsetp, code);
1929 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1930}
1931
859f2b3c 1932
3263d5a2
KH
1933DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1934 doc: /* Encode the character CH into a code-point of CHARSET.
1935Return nil if CHARSET doesn't include CH.
17e7ef1b 1936
d0cf2d48 1937Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2
KH
1938code-point in CCS. Currently not supported and just ignored. */)
1939 (ch, charset, restriction)
1940 Lisp_Object ch, charset, restriction;
4ed46869 1941{
16fed1fc 1942 int id;
3263d5a2
KH
1943 unsigned code;
1944 struct charset *charsetp;
046b1f03 1945
3263d5a2
KH
1946 CHECK_CHARSET_GET_ID (charset, id);
1947 CHECK_NATNUM (ch);
3263d5a2 1948 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1949 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1950 if (code == CHARSET_INVALID_CODE (charsetp))
1951 return Qnil;
1952 if (code > 0x7FFFFFF)
1953 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1954 return make_number (code);
beeedaad
KH
1955}
1956
beeedaad 1957
b121a744
KH
1958DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1959 doc:
1960 /* Return a character of CHARSET whose position codes are CODEn.
1961
1962CODE1 through CODE4 are optional, but if you don't supply sufficient
1963position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1964is specified. */)
b121a744
KH
1965 (charset, code1, code2, code3, code4)
1966 Lisp_Object charset, code1, code2, code3, code4;
beeedaad 1967{
3263d5a2
KH
1968 int id, dimension;
1969 struct charset *charsetp;
b121a744
KH
1970 unsigned code;
1971 int c;
87b089ad 1972
3263d5a2
KH
1973 CHECK_CHARSET_GET_ID (charset, id);
1974 charsetp = CHARSET_FROM_ID (id);
4ed46869 1975
b121a744
KH
1976 dimension = CHARSET_DIMENSION (charsetp);
1977 if (NILP (code1))
d47073ca
KH
1978 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1979 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1980 else
859f2b3c 1981 {
b121a744
KH
1982 CHECK_NATNUM (code1);
1983 if (XFASTINT (code1) >= 0x100)
1984 args_out_of_range (make_number (0xFF), code1);
1985 code = XFASTINT (code1);
859f2b3c 1986
b0a1e45e 1987 if (dimension > 1)
beeedaad 1988 {
b121a744 1989 code <<= 8;
b0a1e45e
KH
1990 if (NILP (code2))
1991 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1992 else
b121a744 1993 {
b0a1e45e
KH
1994 CHECK_NATNUM (code2);
1995 if (XFASTINT (code2) >= 0x100)
1996 args_out_of_range (make_number (0xFF), code2);
1997 code |= XFASTINT (code2);
b121a744 1998 }
99529c2c 1999
b0a1e45e 2000 if (dimension > 2)
b121a744
KH
2001 {
2002 code <<= 8;
b0a1e45e
KH
2003 if (NILP (code3))
2004 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
2005 else
2006 {
b0a1e45e
KH
2007 CHECK_NATNUM (code3);
2008 if (XFASTINT (code3) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code3);
2010 code |= XFASTINT (code3);
2011 }
2012
2013 if (dimension > 3)
2014 {
2015 code <<= 8;
2016 if (NILP (code4))
2017 code |= charsetp->code_space[0];
2018 else
2019 {
2020 CHECK_NATNUM (code4);
2021 if (XFASTINT (code4) >= 0x100)
2022 args_out_of_range (make_number (0xFF), code4);
2023 code |= XFASTINT (code4);
2024 }
b121a744
KH
2025 }
2026 }
beeedaad 2027 }
859f2b3c 2028 }
beeedaad 2029
b121a744
KH
2030 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2031 code &= 0x7F7F7F7F;
2032 c = DECODE_CHAR (charsetp, code);
2033 if (c < 0)
2034 error ("Invalid code(s)");
3263d5a2 2035 return make_number (c);
4ed46869
KH
2036}
2037
beeedaad 2038
3263d5a2
KH
2039/* Return the first charset in CHARSET_LIST that contains C.
2040 CHARSET_LIST is a list of charset IDs. If it is nil, use
2041 Vcharset_ordered_list. */
beeedaad 2042
3263d5a2
KH
2043struct charset *
2044char_charset (c, charset_list, code_return)
2045 int c;
2046 Lisp_Object charset_list;
2047 unsigned *code_return;
2e344af3 2048{
8a1816bb
KH
2049 int maybe_null = 0;
2050
3263d5a2
KH
2051 if (NILP (charset_list))
2052 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2053 else
2054 maybe_null = 1;
beeedaad 2055
6c652beb 2056 while (CONSP (charset_list))
2e344af3 2057 {
3263d5a2
KH
2058 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2059 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2060
3263d5a2 2061 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2062 {
3263d5a2
KH
2063 if (code_return)
2064 *code_return = code;
2065 return charset;
3f62427c 2066 }
3263d5a2 2067 charset_list = XCDR (charset_list);
6c652beb
KH
2068 if (c <= MAX_UNICODE_CHAR
2069 && EQ (charset_list, Vcharset_non_preferred_head))
2070 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2071 }
8a1816bb
KH
2072 return (maybe_null ? NULL
2073 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2074 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2075}
2076
2e344af3 2077
3263d5a2 2078DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2079 doc:
d0cf2d48 2080 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2081The charset is decided by the current priority order of charsets.
2082A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2083CH in the charset. */)
3263d5a2
KH
2084 (ch)
2085 Lisp_Object ch;
4ed46869 2086{
3263d5a2
KH
2087 struct charset *charset;
2088 int c, dimension;
2089 unsigned code;
4ed46869
KH
2090 Lisp_Object val;
2091
3263d5a2
KH
2092 CHECK_CHARACTER (ch);
2093 c = XFASTINT (ch);
2094 charset = CHAR_CHARSET (c);
2095 if (! charset)
3cc67a4d 2096 abort ();
3263d5a2
KH
2097 code = ENCODE_CHAR (charset, c);
2098 if (code == CHARSET_INVALID_CODE (charset))
2099 abort ();
2100 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2101 for (val = Qnil; dimension > 0; dimension--)
2102 {
2103 val = Fcons (make_number (code & 0xFF), val);
2104 code >>= 8;
2105 }
3263d5a2 2106 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2107}
2108
740f080d 2109
4cb75c4b
KH
2110DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2111 doc: /* Return the charset of highest priority that contains CH.
2112If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2113from which to find the charset. It may also be a coding system. In
2114that case, find the charset from what supported by that coding system. */)
2115 (ch, restriction)
2116 Lisp_Object ch, restriction;
4ed46869 2117{
3263d5a2 2118 struct charset *charset;
4ed46869 2119
3263d5a2 2120 CHECK_CHARACTER (ch);
4cb75c4b
KH
2121 if (NILP (restriction))
2122 charset = CHAR_CHARSET (XINT (ch));
2123 else
2124 {
2125 Lisp_Object charset_list;
2126
2127 if (CONSP (restriction))
2128 {
2129 for (charset_list = Qnil; CONSP (restriction);
2130 restriction = XCDR (restriction))
2131 {
2132 int id;
2133
2134 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2135 charset_list = Fcons (make_number (id), charset_list);
2136 }
2137 charset_list = Fnreverse (charset_list);
2138 }
2139 else
2140 charset_list = coding_system_charset_list (restriction);
2141 charset = char_charset (XINT (ch), charset_list, NULL);
2142 if (! charset)
2143 return Qnil;
2144 }
3263d5a2 2145 return (CHARSET_NAME (charset));
4ed46869
KH
2146}
2147
17e7ef1b 2148
3263d5a2
KH
2149DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2150 doc: /*
2151Return charset of a character in the current buffer at position POS.
2152If POS is nil, it defauls to the current point.
2153If POS is out of range, the value is nil. */)
2154 (pos)
2155 Lisp_Object pos;
2e344af3 2156{
3263d5a2
KH
2157 Lisp_Object ch;
2158 struct charset *charset;
046b1f03 2159
3263d5a2
KH
2160 ch = Fchar_after (pos);
2161 if (! INTEGERP (ch))
2162 return ch;
2163 charset = CHAR_CHARSET (XINT (ch));
2164 return (CHARSET_NAME (charset));
6ae1f27e 2165}
9036eb45 2166
87b089ad 2167
3263d5a2
KH
2168DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2169 doc: /*
2170Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2171
2172ISO 2022's designation sequence (escape sequence) distinguishes charsets
2173by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2174whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2175See the documentation of the function `charset-info' for the meanings of
2176DIMENSION, CHARS, and FINAL-CHAR. */)
2177 (dimension, chars, final_char)
2178 Lisp_Object dimension, chars, final_char;
6ae1f27e 2179{
3263d5a2 2180 int id;
82215ce9 2181 int chars_flag;
a8a35e61 2182
3263d5a2 2183 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2184 chars_flag = XFASTINT (chars) == 96;
2185 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2186 XFASTINT (final_char));
2187 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2188}
2189
87b089ad 2190
3263d5a2
KH
2191DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2192 0, 0, 0,
2193 doc: /*
ecca2aad
KH
2194Internal use only.
2195Clear temporary charset mapping tables.
2196It should be called only from temacs invoked for dumping. */)
3263d5a2 2197 ()
87b089ad 2198{
ecca2aad 2199 if (temp_charset_work)
87b089ad 2200 {
ecca2aad
KH
2201 free (temp_charset_work);
2202 temp_charset_work = NULL;
2e344af3 2203 }
2e344af3 2204
ecca2aad
KH
2205 if (CHAR_TABLE_P (Vchar_unify_table))
2206 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2207
3263d5a2 2208 return Qnil;
740f080d
KH
2209}
2210
8ddf5e57
DL
2211DEFUN ("charset-priority-list", Fcharset_priority_list,
2212 Scharset_priority_list, 0, 1, 0,
2213 doc: /* Return the list of charsets ordered by priority.
2214HIGHESTP non-nil means just return the highest priority one. */)
2215 (highestp)
2216 Lisp_Object highestp;
2e344af3 2217{
8ddf5e57 2218 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2219
8ddf5e57 2220 if (!NILP (highestp))
16fed1fc 2221 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2222
8ddf5e57 2223 while (!NILP (list))
2e344af3 2224 {
16fed1fc 2225 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2226 list = XCDR (list);
2e344af3 2227 }
8ddf5e57 2228 return Fnreverse (val);
2e344af3
KH
2229}
2230
8ddf5e57
DL
2231DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2232 1, MANY, 0,
2233 doc: /* Assign higher priority to the charsets given as arguments.
2234usage: (set-charset-priority &rest charsets) */)
2235 (nargs, args)
2236 int nargs;
4ed46869
KH
2237 Lisp_Object *args;
2238{
af7c60ca 2239 Lisp_Object new_head, old_list, arglist[2];
321c819c 2240 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2241 int i, id;
4ed46869 2242
8ddf5e57 2243 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2244 new_head = Qnil;
8ddf5e57 2245 for (i = 0; i < nargs; i++)
4ed46869 2246 {
8ddf5e57 2247 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2248 if (! NILP (Fmemq (make_number (id), old_list)))
2249 {
2250 old_list = Fdelq (make_number (id), old_list);
2251 new_head = Fcons (make_number (id), new_head);
2252 }
5729c92f 2253 }
8ddf5e57 2254 arglist[0] = Fnreverse (new_head);
6a9c90ec 2255 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2256 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2257 charset_ordered_list_tick++;
5729c92f 2258
6809ca75 2259 charset_unibyte = -1;
321c819c 2260 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2261 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2262 {
e77415b0 2263 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2264 list_2022 = Fcons (XCAR (old_list), list_2022);
2265 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2266 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2267 if (charset_unibyte < 0)
2268 {
2269 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2270
2271 if (CHARSET_DIMENSION (charset) == 1
2272 && CHARSET_ASCII_COMPATIBLE_P (charset)
2273 && CHARSET_MAX_CHAR (charset) >= 0x80)
2274 charset_unibyte = CHARSET_ID (charset);
2275 }
4ed46869 2276 }
321c819c
KH
2277 Viso_2022_charset_list = Fnreverse (list_2022);
2278 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2279 if (charset_unibyte < 0)
2280 charset_unibyte = charset_iso_8859_1;
4ed46869 2281
8ddf5e57 2282 return Qnil;
4ed46869
KH
2283}
2284
d5b33309
KH
2285DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2286 0, 1, 0,
2287 doc: /* Internal use only.
2288Return charset identification number of CHARSET. */)
2289 (charset)
2290 Lisp_Object charset;
4ed46869 2291{
d5b33309 2292 int id;
4ed46869 2293
d5b33309
KH
2294 CHECK_CHARSET_GET_ID (charset, id);
2295 return make_number (id);
4ed46869
KH
2296}
2297
4ed46869 2298\f
3263d5a2
KH
2299void
2300init_charset ()
4ed46869 2301{
c8f94403
GM
2302 Lisp_Object tempdir;
2303 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
6809ca75 2304 if (access ((char *) SDATA (tempdir), 0) < 0)
c8f94403
GM
2305 {
2306 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2307Emacs will not function correctly without the character map files.\n\
2308Please check your installation!\n",
2309 tempdir);
2310 /* TODO should this be a fatal error? (Bug#909) */
2311 }
2312
2313 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2314}
2315
4ed46869 2316
dfcf069d 2317void
4ed46869
KH
2318init_charset_once ()
2319{
2320 int i, j, k;
2321
3263d5a2
KH
2322 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2323 for (j = 0; j < ISO_MAX_CHARS; j++)
2324 for (k = 0; k < ISO_MAX_FINAL; k++)
2325 iso_charset_table[i][j][k] = -1;
4ed46869 2326
60383934 2327 for (i = 0; i < 256; i++)
3263d5a2 2328 emacs_mule_charset[i] = NULL;
4ed46869 2329
7c7dceee
KH
2330 charset_jisx0201_roman = -1;
2331 charset_jisx0208_1978 = -1;
2332 charset_jisx0208 = -1;
d32320c4 2333 charset_ksc5601 = -1;
4ed46869
KH
2334
2335 for (i = 0; i < 128; i++)
14e3d523 2336 unibyte_to_multibyte_table[i] = i;
4ed46869 2337 for (; i < 256; i++)
170e4589 2338 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
4ed46869
KH
2339}
2340
2341#ifdef emacs
2342
dfcf069d 2343void
4ed46869
KH
2344syms_of_charset ()
2345{
3263d5a2
KH
2346 DEFSYM (Qcharsetp, "charsetp");
2347
2348 DEFSYM (Qascii, "ascii");
2349 DEFSYM (Qunicode, "unicode");
6c652beb 2350 DEFSYM (Qemacs, "emacs");
2fe1edd1 2351 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2352 DEFSYM (Qiso_8859_1, "iso-8859-1");
2353
2354 DEFSYM (Qgl, "gl");
2355 DEFSYM (Qgr, "gr");
2356
3263d5a2
KH
2357 staticpro (&Vcharset_ordered_list);
2358 Vcharset_ordered_list = Qnil;
2359
2360 staticpro (&Viso_2022_charset_list);
2361 Viso_2022_charset_list = Qnil;
2362
2363 staticpro (&Vemacs_mule_charset_list);
2364 Vemacs_mule_charset_list = Qnil;
2365
3943ed76
KH
2366 /* Don't staticpro them here. It's done in syms_of_fns. */
2367 QCtest = intern (":test");
2368 Qeq = intern ("eq");
2369
3263d5a2 2370 staticpro (&Vcharset_hash_table);
8f924df7
KH
2371 {
2372 Lisp_Object args[2];
2373 args[0] = QCtest;
2374 args[1] = Qeq;
2375 Vcharset_hash_table = Fmake_hash_table (2, args);
2376 }
3263d5a2
KH
2377
2378 charset_table_size = 128;
2379 charset_table = ((struct charset *)
2380 xmalloc (sizeof (struct charset) * charset_table_size));
2381 charset_table_used = 0;
2382
3263d5a2
KH
2383 defsubr (&Scharsetp);
2384 defsubr (&Smap_charset_chars);
2385 defsubr (&Sdefine_charset_internal);
2386 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2387 defsubr (&Scharset_plist);
2388 defsubr (&Sset_charset_plist);
2389 defsubr (&Sunify_charset);
3fac5a51 2390 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2391 defsubr (&Sdeclare_equiv_charset);
2392 defsubr (&Sfind_charset_region);
2393 defsubr (&Sfind_charset_string);
3263d5a2
KH
2394 defsubr (&Sdecode_char);
2395 defsubr (&Sencode_char);
4ed46869 2396 defsubr (&Ssplit_char);
3263d5a2 2397 defsubr (&Smake_char);
4ed46869 2398 defsubr (&Schar_charset);
90d7b74e 2399 defsubr (&Scharset_after);
4ed46869 2400 defsubr (&Siso_charset);
3263d5a2 2401 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2402 defsubr (&Scharset_priority_list);
2403 defsubr (&Sset_charset_priority);
d5b33309 2404 defsubr (&Scharset_id_internal);
3263d5a2 2405
4beef065 2406 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2407 doc: /* *List of directories to search for charset map files. */);
4beef065 2408 Vcharset_map_path = Qnil;
4ed46869 2409
ecca2aad
KH
2410 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2411 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2412 inhibit_load_charset_map = 0;
2413
4ed46869 2414 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2415 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2416 Vcharset_list = Qnil;
2417
6a9c90ec
KH
2418 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2419 doc: /* ISO639 language mnemonic symbol for the current language environment.
2420If the current language environment is for multiple languages (e.g. "Latin-1"),
2421the value may be a list of mnemonics. */);
2422 Vcurrent_iso639_language = Qnil;
2423
2fe1edd1
KH
2424 charset_ascii
2425 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2426 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2427 charset_iso_8859_1
2428 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2429 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2430 charset_unicode
73fbf2d9 2431 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2432 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2433 charset_emacs
2434 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2435 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2436 charset_eight_bit
2437 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2438 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2439 MAX_5_BYTE_CHAR + 1);
6809ca75 2440 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2441}
2442
2443#endif /* emacs */
cefd8c4f
KH
2444
2445/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2446 (do not change this comment) */