Include coding.h and termhooks.h.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
76b6f707 3 2008, 2009 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
76b6f707 5 2005, 2006, 2007, 2008, 2009
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869 33#include <sys/types.h>
4ed46869 34#include "lisp.h"
3263d5a2 35#include "character.h"
4ed46869
KH
36#include "charset.h"
37#include "coding.h"
fc6b09bf 38#include "disptab.h"
3263d5a2 39#include "buffer.h"
4ed46869 40
04c2f2c5 41/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 42
3263d5a2 43 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 44 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
4ed46869 48
3263d5a2
KH
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
04c2f2c5 52 charset_table as a struct charset.
4ed46869 53
3263d5a2 54*/
4ed46869 55
3263d5a2
KH
56/* List of all charsets. This variable is used only from Emacs
57 Lisp. */
4ed46869 58Lisp_Object Vcharset_list;
4ed46869 59
3263d5a2
KH
60/* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62Lisp_Object Vcharset_hash_table;
4ed46869 63
3263d5a2
KH
64/* Table of struct charset. */
65struct charset *charset_table;
4ed46869 66
3263d5a2 67static int charset_table_size;
5af5dd92 68static int charset_table_used;
4ed46869 69
3263d5a2 70Lisp_Object Qcharsetp;
4ed46869 71
3263d5a2
KH
72/* Special charset symbols. */
73Lisp_Object Qascii;
2fe1edd1 74Lisp_Object Qeight_bit;
3263d5a2
KH
75Lisp_Object Qiso_8859_1;
76Lisp_Object Qunicode;
6c652beb 77Lisp_Object Qemacs;
4ed46869 78
3263d5a2
KH
79/* The corresponding charsets. */
80int charset_ascii;
2fe1edd1 81int charset_eight_bit;
3263d5a2
KH
82int charset_iso_8859_1;
83int charset_unicode;
6c652beb 84int charset_emacs;
b0e3cf2b 85
7c7dceee
KH
86/* The other special charsets. */
87int charset_jisx0201_roman;
88int charset_jisx0208_1978;
89int charset_jisx0208;
c1a08b4c 90
3263d5a2
KH
91/* Value of charset attribute `charset-iso-plane'. */
92Lisp_Object Qgl, Qgr;
c1a08b4c 93
d1a04588
KH
94/* Charset of unibyte characters. */
95int charset_unibyte;
4ed46869 96
3263d5a2
KH
97/* List of charsets ordered by the priority. */
98Lisp_Object Vcharset_ordered_list;
4ed46869 99
6a9c90ec
KH
100/* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102Lisp_Object Vcharset_non_preferred_head;
103
dbbb237d 104/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 105 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
106 -1. */
107unsigned short charset_ordered_list_tick;
4ed46869 108
3263d5a2
KH
109/* List of iso-2022 charsets. */
110Lisp_Object Viso_2022_charset_list;
35e623fb 111
3263d5a2
KH
112/* List of emacs-mule charsets. */
113Lisp_Object Vemacs_mule_charset_list;
114
115struct charset *emacs_mule_charset[256];
4ed46869
KH
116
117/* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
119int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
4beef065 121Lisp_Object Vcharset_map_path;
3263d5a2 122
ecca2aad
KH
123/* If nonzero, don't load charset maps. */
124int inhibit_load_charset_map;
3263d5a2 125
6a9c90ec
KH
126Lisp_Object Vcurrent_iso639_language;
127
64165ae2
DL
128/* Defined in chartab.c */
129extern void
130map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
134
69f8de5b
KH
135#define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
820ee249
KH
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
3263d5a2
KH
150 : -1)
151
152
153/* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
155
820ee249
KH
156#define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
4cf9710d 169
ecca2aad
KH
170/* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
8a73a704 172
ecca2aad
KH
173static struct
174{
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203} *temp_charset_work;
204
205#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
046b1f03 227\f
93bcb785 228
e9ce014c
KH
229/* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
3263d5a2 231int charset_map_loaded;
4ed46869 232
e9ce014c 233struct charset_map_entries
4ed46869 234{
e9ce014c
KH
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
240};
241
ecca2aad
KH
242/* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
4cf9710d 251
ecca2aad
KH
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 257
ecca2aad 258 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 259
ecca2aad
KH
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265*/
4ed46869 266
3263d5a2 267static void
e9ce014c 268load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 269 struct charset *charset;
e9ce014c
KH
270 struct charset_map_entries *entries;
271 int n_entries;
3263d5a2 272 int control_flag;
4ed46869 273{
3263d5a2 274 Lisp_Object vec, table;
3263d5a2
KH
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
3263d5a2 278 int i;
3263d5a2 279 unsigned char *fast_map = charset->fast_map;
99529c2c 280
e9ce014c
KH
281 if (n_entries <= 0)
282 return;
283
ecca2aad 284 if (control_flag)
8ac5a9cc 285 {
ecca2aad
KH
286 if (! inhibit_load_charset_map)
287 {
288 if (control_flag == 1)
289 {
290 if (charset->method == CHARSET_METHOD_MAP)
291 {
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 293
ecca2aad
KH
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
3263d5a2 333 charset_map_loaded = 1;
2e344af3 334 }
6662e69b 335
e9ce014c 336 min_char = max_char = entries->entry[0].c;
3263d5a2 337 nonascii_min_char = MAX_CHAR;
e9ce014c 338 for (i = 0; i < n_entries; i++)
2e344af3 339 {
e9ce014c 340 unsigned from, to;
3b4f4446
KH
341 int from_index, to_index;
342 int from_c, to_c;
e9ce014c 343 int idx = i % 0x10000;
3263d5a2 344
e9ce014c
KH
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
3b4f4446
KH
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
6662e69b 352 {
3b4f4446
KH
353 to_index = from_index;
354 to_c = from_c;
6662e69b 355 }
3b4f4446 356 else
6662e69b 357 {
3b4f4446
KH
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
6662e69b 360 }
3b4f4446
KH
361 if (from_index < 0 || to_index < 0)
362 continue;
3263d5a2 363
ecca2aad
KH
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
6662e69b 368
ecca2aad
KH
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
3b4f4446
KH
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
177c0ea7 418
ecca2aad
KH
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 421 }
8ac5a9cc 422 }
3263d5a2 423
ecca2aad 424 if (control_flag == 0)
4ed46869 425 {
3263d5a2
KH
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 429 }
ecca2aad
KH
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
4ed46869
KH
435}
436
12bcae05 437
3263d5a2
KH
438/* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
12bcae05 440
3263d5a2
KH
441static INLINE unsigned
442read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
12bcae05 445{
3263d5a2
KH
446 int c;
447 unsigned n;
12bcae05 448
3263d5a2
KH
449 while ((c = getc (fp)) != EOF)
450 {
69f8de5b 451 if (c == '#')
3263d5a2
KH
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
8f924df7 460 }
3263d5a2
KH
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
e9ce014c
KH
475 if (c != EOF)
476 ungetc (c, fp);
3263d5a2
KH
477 return n;
478}
12bcae05 479
537efd8d 480
3263d5a2 481/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
482 Each line of MAPFILE has this form
483 0xAAAA 0xCCCC
484 where 0xAAAA is a code-point and 0xCCCC is the corresponding
485 character code, or this form
486 0xAAAA-0xBBBB 0xCCCC
487 where 0xAAAA and 0xBBBB are code-points specifying a range, and
488 0xCCCC is the first character code of the range.
4ed46869 489
3263d5a2
KH
490 The returned vector has this form:
491 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
492 where CODE1 is a code-point or a cons of code-points specifying a
493 range. */
4ed46869 494
c449997d 495extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
4ed46869 496
e9ce014c
KH
497static void
498load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
499 struct charset *charset;
500 Lisp_Object mapfile;
e9ce014c 501 int control_flag;
4ed46869 502{
e9ce014c
KH
503 unsigned min_code = CHARSET_MIN_CODE (charset);
504 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
505 int fd;
506 FILE *fp;
3263d5a2
KH
507 int eof;
508 Lisp_Object suffixes;
e9ce014c
KH
509 struct charset_map_entries *head, *entries;
510 int n_entries;
4ed46869 511
3263d5a2
KH
512 suffixes = Fcons (build_string (".map"),
513 Fcons (build_string (".TXT"), Qnil));
4ed46869 514
4beef065 515 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
3263d5a2
KH
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 {
519 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 520 return;
3263d5a2 521 }
4ed46869 522
e9ce014c
KH
523 head = entries = ((struct charset_map_entries *)
524 alloca (sizeof (struct charset_map_entries)));
525 n_entries = 0;
3263d5a2
KH
526 eof = 0;
527 while (1)
528 {
e9ce014c
KH
529 unsigned from, to;
530 int c;
531 int idx;
4ed46869 532
e9ce014c 533 from = read_hex (fp, &eof);
3263d5a2
KH
534 if (eof)
535 break;
e9ce014c
KH
536 if (getc (fp) == '-')
537 to = read_hex (fp, &eof);
538 else
539 to = from;
540 c = (int) read_hex (fp, &eof);
ac4137cc 541
e9ce014c
KH
542 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
543 continue;
ac4137cc 544
e9ce014c 545 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 546 {
e9ce014c
KH
547 entries->next = ((struct charset_map_entries *)
548 alloca (sizeof (struct charset_map_entries)));
549 entries = entries->next;
3263d5a2 550 }
e9ce014c
KH
551 idx = n_entries % 0x10000;
552 entries->entry[idx].from = from;
553 entries->entry[idx].to = to;
554 entries->entry[idx].c = c;
555 n_entries++;
3263d5a2
KH
556 }
557 fclose (fp);
558 close (fd);
177c0ea7 559
e9ce014c 560 load_charset_map (charset, head, n_entries, control_flag);
4ed46869
KH
561}
562
e9ce014c
KH
563static void
564load_charset_map_from_vector (charset, vec, control_flag)
565 struct charset *charset;
566 Lisp_Object vec;
567 int control_flag;
23d2a7f1 568{
e9ce014c
KH
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
23d2a7f1 575
e9ce014c 576 if (len % 2 == 1)
3263d5a2 577 {
e9ce014c
KH
578 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
579 return;
3263d5a2 580 }
35e623fb 581
e9ce014c
KH
582 head = entries = ((struct charset_map_entries *)
583 alloca (sizeof (struct charset_map_entries)));
584 n_entries = 0;
585 for (i = 0; i < len; i += 2)
35e623fb 586 {
e9ce014c
KH
587 Lisp_Object val, val2;
588 unsigned from, to;
589 int c;
590 int idx;
d2665018 591
e9ce014c
KH
592 val = AREF (vec, i);
593 if (CONSP (val))
bbf12bb3 594 {
e9ce014c
KH
595 val2 = XCDR (val);
596 val = XCAR (val);
597 CHECK_NATNUM (val);
598 CHECK_NATNUM (val2);
599 from = XFASTINT (val);
600 to = XFASTINT (val2);
bbf12bb3 601 }
e9ce014c 602 else
bbf12bb3 603 {
e9ce014c
KH
604 CHECK_NATNUM (val);
605 from = to = XFASTINT (val);
bbf12bb3 606 }
e9ce014c
KH
607 val = AREF (vec, i + 1);
608 CHECK_NATNUM (val);
609 c = XFASTINT (val);
76d7b829 610
e9ce014c
KH
611 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
612 continue;
76d7b829 613
dcc694d7 614 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c
KH
615 {
616 entries->next = ((struct charset_map_entries *)
617 alloca (sizeof (struct charset_map_entries)));
618 entries = entries->next;
619 }
620 idx = n_entries % 0x10000;
621 entries->entry[idx].from = from;
622 entries->entry[idx].to = to;
623 entries->entry[idx].c = c;
624 n_entries++;
625 }
76d7b829 626
e9ce014c 627 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
628}
629
ecca2aad
KH
630
631/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
632 map it is (see the comment of load_charset_map for the detail). */
633
3263d5a2 634static void
ecca2aad 635load_charset (charset, control_flag)
3263d5a2 636 struct charset *charset;
ecca2aad 637 int control_flag;
76d7b829 638{
ecca2aad 639 Lisp_Object map;
76d7b829 640
ecca2aad
KH
641 if (inhibit_load_charset_map
642 && temp_charset_work
643 && charset == temp_charset_work->current
644 && (control_flag == 2 == temp_charset_work->for_encoder))
645 return;
646
647 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
648 map = CHARSET_MAP (charset);
649 else if (CHARSET_UNIFIED_P (charset))
650 map = CHARSET_UNIFY_MAP (charset);
651 if (STRINGP (map))
652 load_charset_map_from_file (charset, map, control_flag);
653 else
654 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 655}
76d7b829 656
3263d5a2
KH
657
658DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
659 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
660 (object)
661 Lisp_Object object;
23d2a7f1 662{
3263d5a2 663 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
664}
665
4ed46869 666
ecca2aad
KH
667void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
668 Lisp_Object function, Lisp_Object arg,
669 unsigned from, unsigned to));
670
671void
672map_charset_for_dump (c_function, function, arg, from, to)
673 void (*c_function) (Lisp_Object, Lisp_Object);
674 Lisp_Object function, arg;
675 unsigned from, to;
676{
677 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
678 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
679 Lisp_Object range;
680 int c, stop;
681 struct gcpro gcpro1;
682
683 range = Fcons (Qnil, Qnil);
684 GCPRO1 (range);
685
686 c = temp_charset_work->min_char;
687 stop = (temp_charset_work->max_char < 0x20000
688 ? temp_charset_work->max_char : 0xFFFF);
689
690 while (1)
691 {
692 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
693
694 if (index >= from_idx && index <= to_idx)
695 {
696 if (NILP (XCAR (range)))
697 XSETCAR (range, make_number (c));
698 }
699 else if (! NILP (XCAR (range)))
700 {
701 XSETCDR (range, make_number (c - 1));
702 if (c_function)
703 (*c_function) (arg, range);
704 else
705 call2 (function, range, arg);
706 XSETCAR (range, Qnil);
707 }
708 if (c == stop)
709 {
710 if (c == temp_charset_work->max_char)
711 {
712 if (! NILP (XCAR (range)))
713 {
714 XSETCDR (range, make_number (c));
715 if (c_function)
716 (*c_function) (arg, range);
717 else
718 call2 (function, range, arg);
719 }
720 break;
721 }
722 c = 0x1FFFF;
723 stop = temp_charset_work->max_char;
724 }
725 c++;
726 }
727}
728
4ed46869 729void
374c5cfd
KH
730map_charset_chars (c_function, function, arg,
731 charset, from, to)
732 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
733 Lisp_Object function, arg;
734 struct charset *charset;
735 unsigned from, to;
4ed46869 736{
3263d5a2 737 Lisp_Object range;
374c5cfd 738 int partial;
3263d5a2 739
374c5cfd
KH
740 partial = (from > CHARSET_MIN_CODE (charset)
741 || to < CHARSET_MAX_CODE (charset));
742
3263d5a2 743 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 744 {
374c5cfd
KH
745 int from_idx = CODE_POINT_TO_INDEX (charset, from);
746 int to_idx = CODE_POINT_TO_INDEX (charset, to);
747 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
748 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
749
ecca2aad
KH
750 if (CHARSET_UNIFIED_P (charset))
751 {
752 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
753 load_charset (charset, 2);
754 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
755 map_char_table_for_charset (c_function, function,
756 CHARSET_DEUNIFIER (charset), arg,
757 partial ? charset : NULL, from, to);
758 else
759 map_charset_for_dump (c_function, function, arg, from, to);
760 }
761
374c5cfd 762 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 763 if (NILP (function))
5af5dd92 764 (*c_function) (arg, range);
3263d5a2
KH
765 else
766 call2 (function, range, arg);
c83ef371 767 }
3263d5a2
KH
768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
769 {
770 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
771 load_charset (charset, 2);
772 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
773 map_char_table_for_charset (c_function, function,
774 CHARSET_ENCODER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 778 }
374c5cfd 779 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 780 {
374c5cfd
KH
781 Lisp_Object subset_info;
782 int offset;
783
784 subset_info = CHARSET_SUBSET (charset);
785 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
786 offset = XINT (AREF (subset_info, 3));
787 from -= offset;
788 if (from < XFASTINT (AREF (subset_info, 1)))
789 from = XFASTINT (AREF (subset_info, 1));
790 to -= offset;
791 if (to > XFASTINT (AREF (subset_info, 2)))
792 to = XFASTINT (AREF (subset_info, 2));
793 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 794 }
374c5cfd
KH
795 else /* i.e. CHARSET_METHOD_SUPERSET */
796 {
797 Lisp_Object parents;
4ed46869 798
374c5cfd
KH
799 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
800 parents = XCDR (parents))
bbf12bb3 801 {
374c5cfd
KH
802 int offset;
803 unsigned this_from, this_to;
804
805 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
806 offset = XINT (XCDR (XCAR (parents)));
807 this_from = from - offset;
808 this_to = to - offset;
809 if (this_from < CHARSET_MIN_CODE (charset))
810 this_from = CHARSET_MIN_CODE (charset);
811 if (this_to > CHARSET_MAX_CODE (charset))
812 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
813 map_charset_chars (c_function, function, arg, charset,
814 this_from, this_to);
bbf12bb3 815 }
35e623fb 816 }
4ed46869
KH
817}
818
374c5cfd 819DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 820 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 821FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 822argument ARG.
4ed46869 823
374c5cfd
KH
824RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
825characters contained in CHARSET.
4ed46869 826
374c5cfd 827The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
8f924df7 828range of code points of target characters. */)
374c5cfd
KH
829 (function, charset, arg, from_code, to_code)
830 Lisp_Object function, charset, arg, from_code, to_code;
4ed46869 831{
374c5cfd 832 struct charset *cs;
16fed1fc 833 unsigned from, to;
4ed46869 834
374c5cfd
KH
835 CHECK_CHARSET_GET_CHARSET (charset, cs);
836 if (NILP (from_code))
16fed1fc 837 from = CHARSET_MIN_CODE (cs);
970b7474 838 else
4ed46869 839 {
970b7474
KH
840 CHECK_NATNUM (from_code);
841 from = XINT (from_code);
842 if (from < CHARSET_MIN_CODE (cs))
843 from = CHARSET_MIN_CODE (cs);
4ed46869 844 }
374c5cfd 845 if (NILP (to_code))
970b7474 846 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
847 else
848 {
970b7474
KH
849 CHECK_NATNUM (to_code);
850 to = XINT (to_code);
851 if (to > CHARSET_MAX_CODE (cs))
852 to = CHARSET_MAX_CODE (cs);
4ed46869 853 }
16fed1fc 854 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 855 return Qnil;
35e623fb 856}
4ed46869 857
4ed46869 858
3263d5a2
KH
859/* Define a charset according to the arguments. The Nth argument is
860 the Nth attribute of the charset (the last attribute `charset-id'
861 is not included). See the docstring of `define-charset' for the
862 detail. */
4ed46869 863
3263d5a2
KH
864DEFUN ("define-charset-internal", Fdefine_charset_internal,
865 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
866 doc: /* For internal use only.
867usage: (define-charset-internal ...) */)
3263d5a2
KH
868 (nargs, args)
869 int nargs;
870 Lisp_Object *args;
4ed46869 871{
3263d5a2
KH
872 /* Charset attr vector. */
873 Lisp_Object attrs;
874 Lisp_Object val;
875 unsigned hash_code;
876 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 877 int i, j;
3263d5a2
KH
878 struct charset charset;
879 int id;
880 int dimension;
881 int new_definition_p;
882 int nchars;
883
884 if (nargs != charset_arg_max)
885 return Fsignal (Qwrong_number_of_arguments,
886 Fcons (intern ("define-charset-internal"),
887 make_number (nargs)));
888
889 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
890
891 CHECK_SYMBOL (args[charset_arg_name]);
892 ASET (attrs, charset_name, args[charset_arg_name]);
893
894 val = args[charset_arg_code_space];
895 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 896 {
3263d5a2
KH
897 int min_byte, max_byte;
898
899 min_byte = XINT (Faref (val, make_number (i * 2)));
900 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
901 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
902 error ("Invalid :code-space value");
903 charset.code_space[i * 4] = min_byte;
904 charset.code_space[i * 4 + 1] = max_byte;
905 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
906 nchars *= charset.code_space[i * 4 + 2];
907 charset.code_space[i * 4 + 3] = nchars;
908 if (max_byte > 0)
909 dimension = i + 1;
910 }
4ed46869 911
3263d5a2
KH
912 val = args[charset_arg_dimension];
913 if (NILP (val))
914 charset.dimension = dimension;
915 else
4ed46869 916 {
3263d5a2
KH
917 CHECK_NATNUM (val);
918 charset.dimension = XINT (val);
919 if (charset.dimension < 1 || charset.dimension > 4)
920 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
921 }
922
3263d5a2
KH
923 charset.code_linear_p
924 = (charset.dimension == 1
925 || (charset.code_space[2] == 256
926 && (charset.dimension == 2
927 || (charset.code_space[6] == 256
928 && (charset.dimension == 3
929 || charset.code_space[10] == 256)))));
930
69f8de5b 931 if (! charset.code_linear_p)
4ed46869 932 {
69f8de5b 933 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 934 bzero (charset.code_space_mask, 256);
69f8de5b
KH
935 for (i = 0; i < 4; i++)
936 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
937 j++)
938 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
939 }
940
3263d5a2 941 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 942
3263d5a2
KH
943 charset.min_code = (charset.code_space[0]
944 | (charset.code_space[4] << 8)
945 | (charset.code_space[8] << 16)
946 | (charset.code_space[12] << 24));
947 charset.max_code = (charset.code_space[1]
948 | (charset.code_space[5] << 8)
949 | (charset.code_space[9] << 16)
950 | (charset.code_space[13] << 24));
820ee249 951 charset.char_index_offset = 0;
8a73a704 952
820ee249
KH
953 val = args[charset_arg_min_code];
954 if (! NILP (val))
955 {
956 unsigned code;
fdb82f93 957
820ee249
KH
958 if (INTEGERP (val))
959 code = XINT (val);
960 else
961 {
962 CHECK_CONS (val);
8f924df7
KH
963 CHECK_NUMBER_CAR (val);
964 CHECK_NUMBER_CDR (val);
820ee249
KH
965 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
966 }
967 if (code < charset.min_code
968 || code > charset.max_code)
969 args_out_of_range_3 (make_number (charset.min_code),
970 make_number (charset.max_code), val);
971 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
972 charset.min_code = code;
973 }
3fac5a51 974
820ee249
KH
975 val = args[charset_arg_max_code];
976 if (! NILP (val))
3fac5a51 977 {
820ee249
KH
978 unsigned code;
979
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
983 {
984 CHECK_CONS (val);
8f924df7
KH
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
820ee249
KH
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
988 }
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.max_code = code;
3fac5a51 994 }
3fac5a51 995
ecca2aad 996 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 997
3263d5a2
KH
998 val = args[charset_arg_invalid_code];
999 if (NILP (val))
1000 {
1001 if (charset.min_code > 0)
1002 charset.invalid_code = 0;
bbf12bb3
KH
1003 else
1004 {
3263d5a2
KH
1005 XSETINT (val, charset.max_code + 1);
1006 if (XINT (val) == charset.max_code + 1)
1007 charset.invalid_code = charset.max_code + 1;
1008 else
1009 error ("Attribute :invalid-code must be specified");
76d7b829 1010 }
76d7b829 1011 }
3263d5a2
KH
1012 else
1013 {
1014 CHECK_NATNUM (val);
1015 charset.invalid_code = XFASTINT (val);
1016 }
4ed46869 1017
3263d5a2
KH
1018 val = args[charset_arg_iso_final];
1019 if (NILP (val))
1020 charset.iso_final = -1;
1021 else
1022 {
1023 CHECK_NUMBER (val);
1024 if (XINT (val) < '0' || XINT (val) > 127)
1025 error ("Invalid iso-final-char: %d", XINT (val));
1026 charset.iso_final = XINT (val);
1027 }
4ed46869 1028
3263d5a2
KH
1029 val = args[charset_arg_iso_revision];
1030 if (NILP (val))
1031 charset.iso_revision = -1;
1032 else
4ed46869 1033 {
3263d5a2
KH
1034 CHECK_NUMBER (val);
1035 if (XINT (val) > 63)
1036 args_out_of_range (make_number (63), val);
1037 charset.iso_revision = XINT (val);
4ed46869 1038 }
4ed46869 1039
3263d5a2
KH
1040 val = args[charset_arg_emacs_mule_id];
1041 if (NILP (val))
1042 charset.emacs_mule_id = -1;
4ed46869
KH
1043 else
1044 {
3263d5a2
KH
1045 CHECK_NATNUM (val);
1046 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1047 error ("Invalid emacs-mule-id: %d", XINT (val));
1048 charset.emacs_mule_id = XINT (val);
c83ef371 1049 }
f6302ac9 1050
3263d5a2 1051 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1052
3263d5a2 1053 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1054
3263d5a2
KH
1055 charset.unified_p = 0;
1056
1057 bzero (charset.fast_map, sizeof (charset.fast_map));
1058
1059 if (! NILP (args[charset_arg_code_offset]))
1060 {
1061 val = args[charset_arg_code_offset];
1062 CHECK_NUMBER (val);
1063
1064 charset.method = CHARSET_METHOD_OFFSET;
1065 charset.code_offset = XINT (val);
1066
1067 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1068 charset.min_char = i + charset.code_offset;
1069 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1070 charset.max_char = i + charset.code_offset;
1071 if (charset.max_char > MAX_CHAR)
1072 error ("Unsupported max char: %d", charset.max_char);
1073
f148205f
KH
1074 i = (charset.min_char >> 7) << 7;
1075 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1076 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1077 i = (i >> 12) << 12;
3263d5a2
KH
1078 for (; i <= charset.max_char; i += 0x1000)
1079 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1080 }
1081 else if (! NILP (args[charset_arg_map]))
1082 {
1083 val = args[charset_arg_map];
1084 ASET (attrs, charset_map, val);
ecca2aad 1085 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1086 }
374c5cfd 1087 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1088 {
374c5cfd
KH
1089 Lisp_Object parent;
1090 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1091 struct charset *parent_charset;
1092
1093 val = args[charset_arg_subset];
1094 parent = Fcar (val);
1095 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1096 parent_min_code = Fnth (make_number (1), val);
1097 CHECK_NATNUM (parent_min_code);
1098 parent_max_code = Fnth (make_number (2), val);
1099 CHECK_NATNUM (parent_max_code);
1100 parent_code_offset = Fnth (make_number (3), val);
1101 CHECK_NUMBER (parent_code_offset);
1102 val = Fmake_vector (make_number (4), Qnil);
1103 ASET (val, 0, make_number (parent_charset->id));
1104 ASET (val, 1, parent_min_code);
1105 ASET (val, 2, parent_max_code);
1106 ASET (val, 3, parent_code_offset);
1107 ASET (attrs, charset_subset, val);
1108
1109 charset.method = CHARSET_METHOD_SUBSET;
1110 /* Here, we just copy the parent's fast_map. It's not accurate,
1111 but at least it works for quickly detecting which character
1112 DOESN'T belong to this charset. */
1113 for (i = 0; i < 190; i++)
1114 charset.fast_map[i] = parent_charset->fast_map[i];
1115
1116 /* We also copy these for parents. */
1117 charset.min_char = parent_charset->min_char;
1118 charset.max_char = parent_charset->max_char;
1119 }
1120 else if (! NILP (args[charset_arg_superset]))
0282eb69 1121 {
374c5cfd
KH
1122 val = args[charset_arg_superset];
1123 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1124 val = Fcopy_sequence (val);
374c5cfd 1125 ASET (attrs, charset_superset, val);
3263d5a2
KH
1126
1127 charset.min_char = MAX_CHAR;
1128 charset.max_char = 0;
1129 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1130 {
3263d5a2
KH
1131 Lisp_Object elt, car_part, cdr_part;
1132 int this_id, offset;
1133 struct charset *this_charset;
2e344af3 1134
3263d5a2
KH
1135 elt = Fcar (val);
1136 if (CONSP (elt))
2e344af3 1137 {
3263d5a2
KH
1138 car_part = XCAR (elt);
1139 cdr_part = XCDR (elt);
1140 CHECK_CHARSET_GET_ID (car_part, this_id);
1141 CHECK_NUMBER (cdr_part);
1142 offset = XINT (cdr_part);
177c0ea7 1143 }
3263d5a2 1144 else
4ed46869 1145 {
3263d5a2
KH
1146 CHECK_CHARSET_GET_ID (elt, this_id);
1147 offset = 0;
4ed46869 1148 }
3263d5a2
KH
1149 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1150
1151 this_charset = CHARSET_FROM_ID (this_id);
1152 if (charset.min_char > this_charset->min_char)
1153 charset.min_char = this_charset->min_char;
1154 if (charset.max_char < this_charset->max_char)
1155 charset.max_char = this_charset->max_char;
1156 for (i = 0; i < 190; i++)
1157 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1158 }
0282eb69 1159 }
2e344af3 1160 else
3263d5a2 1161 error ("None of :code-offset, :map, :parents are specified");
05505664 1162
3263d5a2
KH
1163 val = args[charset_arg_unify_map];
1164 if (! NILP (val) && !STRINGP (val))
1165 CHECK_VECTOR (val);
1166 ASET (attrs, charset_unify_map, val);
05505664 1167
3263d5a2
KH
1168 CHECK_LIST (args[charset_arg_plist]);
1169 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1170
3263d5a2
KH
1171 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1172 &hash_code);
1173 if (charset.hash_index >= 0)
1174 {
1175 new_definition_p = 0;
4f65af01 1176 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1177 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1178 }
1a45ff10 1179 else
3263d5a2
KH
1180 {
1181 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1182 hash_code);
1183 if (charset_table_used == charset_table_size)
1184 {
2fe1edd1
KH
1185 struct charset *new_table
1186 = (struct charset *) xmalloc (sizeof (struct charset)
1187 * (charset_table_size + 16));
1188 bcopy (charset_table, new_table,
1189 sizeof (struct charset) * charset_table_size);
1190 charset_table_size += 16;
1191 charset_table = new_table;
3263d5a2
KH
1192 }
1193 id = charset_table_used++;
3263d5a2
KH
1194 new_definition_p = 1;
1195 }
2e344af3 1196
4f65af01 1197 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1198 charset.id = id;
1199 charset_table[id] = charset;
2e344af3 1200
ecca2aad 1201 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1202 {
1203 load_charset (&charset, 0);
1204 charset_table[id] = charset;
1205 }
ecca2aad 1206
3263d5a2 1207 if (charset.iso_final >= 0)
4ed46869 1208 {
3263d5a2
KH
1209 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1210 charset.iso_final) = id;
1211 if (new_definition_p)
1212 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1213 Fcons (make_number (id), Qnil));
7c7dceee
KH
1214 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1215 charset_jisx0201_roman = id;
1216 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1217 charset_jisx0208_1978 = id;
1218 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1219 charset_jisx0208 = id;
4ed46869 1220 }
d0cf2d48 1221
3263d5a2 1222 if (charset.emacs_mule_id >= 0)
4ed46869 1223 {
3263d5a2 1224 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1225 if (charset.emacs_mule_id < 0xA0)
1226 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1227 else
1228 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1229 if (new_definition_p)
1230 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1231 Fcons (make_number (id), Qnil));
4ed46869
KH
1232 }
1233
3263d5a2
KH
1234 if (new_definition_p)
1235 {
1236 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1237 if (charset.supplementary_p)
1238 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1239 Fcons (make_number (id), Qnil));
1240 else
880820fe
KH
1241 {
1242 Lisp_Object tail;
1243
1244 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1245 {
1246 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1247
1248 if (cs->supplementary_p)
1249 break;
1250 }
1251 if (EQ (tail, Vcharset_ordered_list))
1252 Vcharset_ordered_list = Fcons (make_number (id),
1253 Vcharset_ordered_list);
1254 else if (NILP (tail))
1255 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1256 Fcons (make_number (id), Qnil));
1257 else
1258 {
1259 val = Fcons (XCAR (tail), XCDR (tail));
1260 XSETCDR (tail, val);
1261 XSETCAR (tail, make_number (id));
1262 }
1263 }
dbbb237d 1264 charset_ordered_list_tick++;
3263d5a2 1265 }
4ed46869 1266
3263d5a2 1267 return Qnil;
4ed46869
KH
1268}
1269
2fe1edd1
KH
1270
1271/* Same as Fdefine_charset_internal but arguments are more convenient
1272 to call from C (typically in syms_of_charset). This can define a
1273 charset of `offset' method only. Return the ID of the new
1274 charset. */
1275
1276static int
1277define_charset_internal (name, dimension, code_space, min_code, max_code,
1278 iso_final, iso_revision, emacs_mule_id,
7acf89e6 1279 ascii_compatible, supplementary,
2fe1edd1
KH
1280 code_offset)
1281 Lisp_Object name;
1282 int dimension;
1283 unsigned char *code_space;
1284 unsigned min_code, max_code;
1285 int iso_final, iso_revision, emacs_mule_id;
7acf89e6 1286 int ascii_compatible, supplementary;
2fe1edd1
KH
1287 int code_offset;
1288{
1289 Lisp_Object args[charset_arg_max];
1290 Lisp_Object plist[14];
1291 Lisp_Object val;
1292 int i;
1293
1294 args[charset_arg_name] = name;
1295 args[charset_arg_dimension] = make_number (dimension);
1296 val = Fmake_vector (make_number (8), make_number (0));
1297 for (i = 0; i < 8; i++)
1298 ASET (val, i, make_number (code_space[i]));
1299 args[charset_arg_code_space] = val;
1300 args[charset_arg_min_code] = make_number (min_code);
1301 args[charset_arg_max_code] = make_number (max_code);
1302 args[charset_arg_iso_final]
1303 = (iso_final < 0 ? Qnil : make_number (iso_final));
1304 args[charset_arg_iso_revision] = make_number (iso_revision);
1305 args[charset_arg_emacs_mule_id]
1306 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1307 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1308 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1309 args[charset_arg_invalid_code] = Qnil;
1310 args[charset_arg_code_offset] = make_number (code_offset);
1311 args[charset_arg_map] = Qnil;
1312 args[charset_arg_subset] = Qnil;
1313 args[charset_arg_superset] = Qnil;
1314 args[charset_arg_unify_map] = Qnil;
1315
1316 plist[0] = intern (":name");
1317 plist[1] = args[charset_arg_name];
1318 plist[2] = intern (":dimension");
1319 plist[3] = args[charset_arg_dimension];
1320 plist[4] = intern (":code-space");
1321 plist[5] = args[charset_arg_code_space];
1322 plist[6] = intern (":iso-final-char");
1323 plist[7] = args[charset_arg_iso_final];
1324 plist[8] = intern (":emacs-mule-id");
1325 plist[9] = args[charset_arg_emacs_mule_id];
1326 plist[10] = intern (":ascii-compatible-p");
1327 plist[11] = args[charset_arg_ascii_compatible_p];
1328 plist[12] = intern (":code-offset");
1329 plist[13] = args[charset_arg_code_offset];
1330
1331 args[charset_arg_plist] = Flist (14, plist);
1332 Fdefine_charset_internal (charset_arg_max, args);
1333
1334 return XINT (CHARSET_SYMBOL_ID (name));
1335}
1336
1337
3263d5a2
KH
1338DEFUN ("define-charset-alias", Fdefine_charset_alias,
1339 Sdefine_charset_alias, 2, 2, 0,
1340 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1341 (alias, charset)
1342 Lisp_Object alias, charset;
4ed46869 1343{
3263d5a2
KH
1344 Lisp_Object attr;
1345
1346 CHECK_CHARSET_GET_ATTR (charset, attr);
1347 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1348 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1349 return Qnil;
1350}
4ed46869 1351
4ed46869 1352
3263d5a2 1353DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1354 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1355 (charset)
1356 Lisp_Object charset;
1357{
1358 Lisp_Object attrs;
1359
1360 CHECK_CHARSET_GET_ATTR (charset, attrs);
1361 return CHARSET_ATTR_PLIST (attrs);
1362}
1363
1364
1365DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1366 doc: /* Set CHARSET's property list to PLIST. */)
1367 (charset, plist)
1368 Lisp_Object charset, plist;
1369{
1370 Lisp_Object attrs;
1371
1372 CHECK_CHARSET_GET_ATTR (charset, attrs);
1373 CHARSET_ATTR_PLIST (attrs) = plist;
1374 return plist;
1375}
1376
1377
dbbb237d 1378DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1379 doc: /* Unify characters of CHARSET with Unicode.
1380This means reading the relevant file and installing the table defined
dbbb237d
KH
1381by CHARSET's `:unify-map' property.
1382
64165ae2
DL
1383Optional second arg UNIFY-MAP is a file name string or a vector. It has
1384the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1385`define-charset' (which see).
1386
1387Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1388 (charset, unify_map, deunify)
1389 Lisp_Object charset, unify_map, deunify;
8a73a704 1390{
3263d5a2
KH
1391 int id;
1392 struct charset *cs;
8f924df7 1393
3263d5a2
KH
1394 CHECK_CHARSET_GET_ID (charset, id);
1395 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1396 if (NILP (deunify)
1397 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1398 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1399 return Qnil;
dbbb237d 1400
3263d5a2 1401 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1402 if (NILP (deunify))
1403 {
ecca2aad
KH
1404 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1405 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1406 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1407 if (NILP (unify_map))
1408 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1409 else
ecca2aad
KH
1410 {
1411 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1412 signal_error ("Bad unify-map", unify_map);
1413 CHARSET_UNIFY_MAP (cs) = unify_map;
1414 }
1415 if (NILP (Vchar_unify_table))
1416 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1417 char_table_set_range (Vchar_unify_table,
1418 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1419 CHARSET_UNIFIED_P (cs) = 1;
1420 }
1421 else if (CHAR_TABLE_P (Vchar_unify_table))
1422 {
1423 int min_code = CHARSET_MIN_CODE (cs);
1424 int max_code = CHARSET_MAX_CODE (cs);
1425 int min_char = DECODE_CHAR (cs, min_code);
1426 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1427
dbbb237d
KH
1428 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1429 }
8f924df7 1430
3263d5a2 1431 return Qnil;
8a73a704
KH
1432}
1433
3fac5a51
KH
1434DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1435 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1436 doc: /*
d0cf2d48 1437Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1438DIMENSION is the number of bytes to represent a character: 1 or 2.
1439CHARS is the number of characters in a dimension: 94 or 96.
1440
1441This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1442If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1443return nil. */)
1444 (dimension, chars)
3fac5a51
KH
1445 Lisp_Object dimension, chars;
1446{
1447 int final_char;
1448
b7826503
PJ
1449 CHECK_NUMBER (dimension);
1450 CHECK_NUMBER (chars);
3263d5a2
KH
1451 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1452 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1453 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1454 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1455 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1456 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1457 break;
3fac5a51
KH
1458 return (final_char <= '?' ? make_number (final_char) : Qnil);
1459}
1460
3263d5a2
KH
1461static void
1462check_iso_charset_parameter (dimension, chars, final_char)
1463 Lisp_Object dimension, chars, final_char;
4ed46869 1464{
3263d5a2
KH
1465 CHECK_NATNUM (dimension);
1466 CHECK_NATNUM (chars);
1467 CHECK_NATNUM (final_char);
4ed46869 1468
3263d5a2
KH
1469 if (XINT (dimension) > 3)
1470 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1471 if (XINT (chars) != 94 && XINT (chars) != 96)
1472 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1473 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1474 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1475}
1476
1477
1478DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1479 4, 4, 0,
cefd8c4f
KH
1480 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1481
1482On decoding by an ISO-2022 base coding system, when a charset
1483specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1484if CHARSET is designated instead. */)
3263d5a2
KH
1485 (dimension, chars, final_char, charset)
1486 Lisp_Object dimension, chars, final_char, charset;
1487{
1488 int id;
82215ce9 1489 int chars_flag;
4ed46869 1490
3263d5a2
KH
1491 CHECK_CHARSET_GET_ID (charset, id);
1492 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1493 chars_flag = XINT (chars) == 96;
1494 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1495 return Qnil;
1496}
1497
3263d5a2 1498
2e344af3
KH
1499/* Return information about charsets in the text at PTR of NBYTES
1500 bytes, which are NCHARS characters. The value is:
f6302ac9 1501
cfe34140 1502 0: Each character is represented by one byte. This is always
3263d5a2
KH
1503 true for a unibyte string. For a multibyte string, true if
1504 it contains only ASCII characters.
1505
28c026cd
DL
1506 1: No charsets other than ascii, control-1, and latin-1 are
1507 found.
1d67c29b 1508
3263d5a2
KH
1509 2: Otherwise.
1510*/
4ed46869
KH
1511
1512int
3263d5a2
KH
1513string_xstring_p (string)
1514 Lisp_Object string;
4ed46869 1515{
8f924df7
KH
1516 const unsigned char *p = SDATA (string);
1517 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1518
8f924df7 1519 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1520 return 0;
1521
3263d5a2 1522 while (p < endp)
0282eb69 1523 {
3263d5a2 1524 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1525
3cc67a4d 1526 if (c >= 0x100)
3263d5a2 1527 return 2;
0282eb69 1528 }
3263d5a2
KH
1529 return 1;
1530}
05505664 1531
05505664 1532
3263d5a2 1533/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1534
3cc67a4d
KH
1535 CHARSETS is a vector. If Nth element is non-nil, it means the
1536 charset whose id is N is already found.
2e344af3 1537
3263d5a2 1538 It may lookup a translation table TABLE if supplied. */
2e344af3 1539
3263d5a2 1540static void
3cc67a4d 1541find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
dbbb237d 1542 const unsigned char *ptr;
42ca828e 1543 EMACS_INT nchars, nbytes;
3263d5a2 1544 Lisp_Object charsets, table;
3cc67a4d 1545 int multibyte;
3263d5a2 1546{
dbbb237d 1547 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1548
1549 if (nchars == nbytes)
3263d5a2 1550 {
3cc67a4d
KH
1551 if (multibyte)
1552 ASET (charsets, charset_ascii, Qt);
1553 else
1554 while (ptr < pend)
1555 {
1556 int c = *ptr++;
1557
1558 if (!NILP (table))
1559 c = translate_char (table, c);
1560 if (ASCII_BYTE_P (c))
1561 ASET (charsets, charset_ascii, Qt);
1562 else
1563 ASET (charsets, charset_eight_bit, Qt);
1564 }
1565 }
1566 else
1567 {
1568 while (ptr < pend)
3263d5a2 1569 {
3cc67a4d
KH
1570 int c = STRING_CHAR_ADVANCE (ptr);
1571 struct charset *charset;
3263d5a2 1572
3cc67a4d
KH
1573 if (!NILP (table))
1574 c = translate_char (table, c);
1575 charset = CHAR_CHARSET (c);
1576 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1577 }
4ed46869 1578 }
4ed46869
KH
1579}
1580
1581DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1582 2, 3, 0,
fdb82f93
PJ
1583 doc: /* Return a list of charsets in the region between BEG and END.
1584BEG and END are buffer positions.
1585Optional arg TABLE if non-nil is a translation table to look up.
1586
fdb82f93
PJ
1587If the current buffer is unibyte, the returned list may contain
1588only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1589 (beg, end, table)
23d2a7f1 1590 Lisp_Object beg, end, table;
4ed46869 1591{
3263d5a2 1592 Lisp_Object charsets;
42ca828e
DL
1593 EMACS_INT from, from_byte, to, stop, stop_byte;
1594 int i;
4ed46869 1595 Lisp_Object val;
3cc67a4d 1596 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1597
1598 validate_region (&beg, &end);
1599 from = XFASTINT (beg);
1600 stop = to = XFASTINT (end);
6ae1f27e 1601
4ed46869 1602 if (from < GPT && GPT < to)
6ae1f27e
RS
1603 {
1604 stop = GPT;
1605 stop_byte = GPT_BYTE;
1606 }
1607 else
1608 stop_byte = CHAR_TO_BYTE (stop);
1609
1610 from_byte = CHAR_TO_BYTE (from);
1611
3263d5a2 1612 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1613 while (1)
1614 {
3263d5a2 1615 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1616 stop_byte - from_byte, charsets, table,
1617 multibyte);
4ed46869 1618 if (stop < to)
6ae1f27e
RS
1619 {
1620 from = stop, from_byte = stop_byte;
1621 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1622 }
4ed46869
KH
1623 else
1624 break;
1625 }
6ae1f27e 1626
4ed46869 1627 val = Qnil;
3263d5a2 1628 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1629 if (!NILP (AREF (charsets, i)))
3263d5a2 1630 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1631 return val;
1632}
1633
1634DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1635 1, 2, 0,
fdb82f93
PJ
1636 doc: /* Return a list of charsets in STR.
1637Optional arg TABLE if non-nil is a translation table to look up.
1638
fdb82f93 1639If STR is unibyte, the returned list may contain
3263d5a2 1640only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1641 (str, table)
23d2a7f1 1642 Lisp_Object str, table;
4ed46869 1643{
3263d5a2 1644 Lisp_Object charsets;
4ed46869
KH
1645 int i;
1646 Lisp_Object val;
1647
b7826503 1648 CHECK_STRING (str);
87b089ad 1649
3263d5a2 1650 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1651 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1652 charsets, table,
1653 STRING_MULTIBYTE (str));
4ed46869 1654 val = Qnil;
3263d5a2 1655 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1656 if (!NILP (AREF (charsets, i)))
3263d5a2 1657 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1658 return val;
1659}
2e344af3 1660
4ed46869 1661\f
3263d5a2 1662
ecca2aad
KH
1663/* Return a unified character code for C (>= 0x110000). VAL is a
1664 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1665 charset symbol. */
1666int
1667maybe_unify_char (c, val)
1668 int c;
1669 Lisp_Object val;
1670{
1671 struct charset *charset;
1672
1673 if (INTEGERP (val))
1674 return XINT (val);
1675 if (NILP (val))
1676 return c;
1677
1678 CHECK_CHARSET_GET_CHARSET (val, charset);
1679 load_charset (charset, 1);
1680 if (! inhibit_load_charset_map)
1681 {
1682 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1683 if (! NILP (val))
1684 c = XINT (val);
1685 }
1686 else
1687 {
1688 int code_index = c - CHARSET_CODE_OFFSET (charset);
1689 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1690
1691 if (unified > 0)
1692 c = unified;
1693 }
1694 return c;
1695}
1696
1697
3263d5a2
KH
1698/* Return a character correponding to the code-point CODE of
1699 CHARSET. */
1700
1701int
1702decode_char (charset, code)
1703 struct charset *charset;
1704 unsigned code;
4ed46869 1705{
3263d5a2
KH
1706 int c, char_index;
1707 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1708
3263d5a2
KH
1709 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1710 return -1;
4ed46869 1711
374c5cfd 1712 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1713 {
374c5cfd
KH
1714 Lisp_Object subset_info;
1715
1716 subset_info = CHARSET_SUBSET (charset);
1717 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1718 code -= XINT (AREF (subset_info, 3));
1719 if (code < XFASTINT (AREF (subset_info, 1))
1720 || code > XFASTINT (AREF (subset_info, 2)))
1721 c = -1;
1722 else
1723 c = DECODE_CHAR (charset, code);
2e344af3 1724 }
374c5cfd 1725 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1726 {
3263d5a2 1727 Lisp_Object parents;
4ed46869 1728
374c5cfd 1729 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1730 c = -1;
1731 for (; CONSP (parents); parents = XCDR (parents))
1732 {
1733 int id = XINT (XCAR (XCAR (parents)));
1734 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1735 unsigned this_code = code - code_offset;
4ed46869 1736
3263d5a2
KH
1737 charset = CHARSET_FROM_ID (id);
1738 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1739 break;
1740 }
1741 }
1742 else
ac4137cc 1743 {
3263d5a2 1744 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1745 if (char_index < 0)
1746 return -1;
4ed46869 1747
3263d5a2 1748 if (method == CHARSET_METHOD_MAP)
ac4137cc 1749 {
3263d5a2 1750 Lisp_Object decoder;
4ed46869 1751
3263d5a2
KH
1752 decoder = CHARSET_DECODER (charset);
1753 if (! VECTORP (decoder))
ecca2aad
KH
1754 {
1755 load_charset (charset, 1);
1756 decoder = CHARSET_DECODER (charset);
1757 }
1758 if (VECTORP (decoder))
1759 c = XINT (AREF (decoder, char_index));
1760 else
1761 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1762 }
ecca2aad 1763 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1764 {
3263d5a2 1765 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1766 if (CHARSET_UNIFIED_P (charset)
1767 && c > MAX_UNICODE_CHAR)
1768 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1769 }
1770 }
4ed46869 1771
3263d5a2 1772 return c;
90d7b74e
KH
1773}
1774
374c5cfd
KH
1775/* Variable used temporarily by the macro ENCODE_CHAR. */
1776Lisp_Object charset_work;
4ed46869 1777
3263d5a2 1778/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1779 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1780 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1781
3263d5a2
KH
1782unsigned
1783encode_char (charset, c)
1784 struct charset *charset;
9b6a601f 1785 int c;
9d3d8cba 1786{
3263d5a2
KH
1787 unsigned code;
1788 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1789
3263d5a2 1790 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1791 {
374c5cfd 1792 Lisp_Object deunifier, deunified;
ecca2aad 1793 int code_index = -1;
4ed46869 1794
3263d5a2
KH
1795 deunifier = CHARSET_DEUNIFIER (charset);
1796 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1797 {
ecca2aad 1798 load_charset (charset, 2);
3263d5a2 1799 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1800 }
ecca2aad
KH
1801 if (CHAR_TABLE_P (deunifier))
1802 {
1803 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1804
1805 if (INTEGERP (deunified))
1806 code_index = XINT (deunified);
1807 }
1808 else
1809 {
1810 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1811 }
1812 if (code_index >= 0)
1813 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1814 }
9d3d8cba 1815
374c5cfd
KH
1816 if (method == CHARSET_METHOD_SUBSET)
1817 {
1818 Lisp_Object subset_info;
1819 struct charset *this_charset;
1820
1821 subset_info = CHARSET_SUBSET (charset);
1822 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1823 code = ENCODE_CHAR (this_charset, c);
1824 if (code == CHARSET_INVALID_CODE (this_charset)
1825 || code < XFASTINT (AREF (subset_info, 1))
1826 || code > XFASTINT (AREF (subset_info, 2)))
1827 return CHARSET_INVALID_CODE (charset);
1828 code += XINT (AREF (subset_info, 3));
1829 return code;
1830 }
9d3d8cba 1831
374c5cfd 1832 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1833 {
3263d5a2 1834 Lisp_Object parents;
d2665018 1835
374c5cfd 1836 parents = CHARSET_SUPERSET (charset);
3263d5a2 1837 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1838 {
3263d5a2
KH
1839 int id = XINT (XCAR (XCAR (parents)));
1840 int code_offset = XINT (XCDR (XCAR (parents)));
1841 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1842
3263d5a2 1843 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1844 if (code != CHARSET_INVALID_CODE (this_charset))
1845 return code + code_offset;
beeedaad 1846 }
3263d5a2
KH
1847 return CHARSET_INVALID_CODE (charset);
1848 }
1bcc1567 1849
15c85a88
KH
1850 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1851 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1852 return CHARSET_INVALID_CODE (charset);
1bcc1567 1853
3263d5a2 1854 if (method == CHARSET_METHOD_MAP)
3f62427c 1855 {
3263d5a2 1856 Lisp_Object encoder;
beeedaad 1857 Lisp_Object val;
9b6a601f 1858
3263d5a2
KH
1859 encoder = CHARSET_ENCODER (charset);
1860 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1861 {
1862 load_charset (charset, 2);
1863 encoder = CHARSET_ENCODER (charset);
1864 }
1865 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1866 {
1867 val = CHAR_TABLE_REF (encoder, c);
1868 if (NILP (val))
1869 return CHARSET_INVALID_CODE (charset);
1870 code = XINT (val);
1871 if (! CHARSET_COMPACT_CODES_P (charset))
1872 code = INDEX_TO_CODE_POINT (charset, code);
1873 }
1874 else
1875 {
1876 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1877 code = INDEX_TO_CODE_POINT (charset, code);
1878 }
3263d5a2 1879 }
820ee249 1880 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1881 {
ecca2aad
KH
1882 int code_index = c - CHARSET_CODE_OFFSET (charset);
1883
1884 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1885 }
8ac5a9cc 1886
3263d5a2 1887 return code;
4ed46869
KH
1888}
1889
4ed46869 1890
3263d5a2
KH
1891DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1892 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1893Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1894
3263d5a2 1895CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1896
3263d5a2 1897Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1898and CODE-POINT to a character. Currently not supported and just ignored. */)
3263d5a2
KH
1899 (charset, code_point, restriction)
1900 Lisp_Object charset, code_point, restriction;
4ed46869 1901{
3263d5a2
KH
1902 int c, id;
1903 unsigned code;
1904 struct charset *charsetp;
859f2b3c 1905
3263d5a2
KH
1906 CHECK_CHARSET_GET_ID (charset, id);
1907 if (CONSP (code_point))
1908 {
8f924df7
KH
1909 CHECK_NATNUM_CAR (code_point);
1910 CHECK_NATNUM_CDR (code_point);
69f8de5b 1911 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1912 }
4ed46869
KH
1913 else
1914 {
3263d5a2
KH
1915 CHECK_NATNUM (code_point);
1916 code = XINT (code_point);
4ed46869 1917 }
3263d5a2
KH
1918 charsetp = CHARSET_FROM_ID (id);
1919 c = DECODE_CHAR (charsetp, code);
1920 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1921}
1922
859f2b3c 1923
3263d5a2
KH
1924DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1925 doc: /* Encode the character CH into a code-point of CHARSET.
1926Return nil if CHARSET doesn't include CH.
17e7ef1b 1927
d0cf2d48 1928Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2
KH
1929code-point in CCS. Currently not supported and just ignored. */)
1930 (ch, charset, restriction)
1931 Lisp_Object ch, charset, restriction;
4ed46869 1932{
16fed1fc 1933 int id;
3263d5a2
KH
1934 unsigned code;
1935 struct charset *charsetp;
046b1f03 1936
3263d5a2
KH
1937 CHECK_CHARSET_GET_ID (charset, id);
1938 CHECK_NATNUM (ch);
3263d5a2 1939 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1940 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1941 if (code == CHARSET_INVALID_CODE (charsetp))
1942 return Qnil;
1943 if (code > 0x7FFFFFF)
1944 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1945 return make_number (code);
beeedaad
KH
1946}
1947
beeedaad 1948
b121a744
KH
1949DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1950 doc:
1951 /* Return a character of CHARSET whose position codes are CODEn.
1952
1953CODE1 through CODE4 are optional, but if you don't supply sufficient
1954position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1955is specified. */)
b121a744
KH
1956 (charset, code1, code2, code3, code4)
1957 Lisp_Object charset, code1, code2, code3, code4;
beeedaad 1958{
3263d5a2
KH
1959 int id, dimension;
1960 struct charset *charsetp;
b121a744
KH
1961 unsigned code;
1962 int c;
87b089ad 1963
3263d5a2
KH
1964 CHECK_CHARSET_GET_ID (charset, id);
1965 charsetp = CHARSET_FROM_ID (id);
4ed46869 1966
b121a744
KH
1967 dimension = CHARSET_DIMENSION (charsetp);
1968 if (NILP (code1))
d47073ca
KH
1969 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1970 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1971 else
859f2b3c 1972 {
b121a744
KH
1973 CHECK_NATNUM (code1);
1974 if (XFASTINT (code1) >= 0x100)
1975 args_out_of_range (make_number (0xFF), code1);
1976 code = XFASTINT (code1);
859f2b3c 1977
b0a1e45e 1978 if (dimension > 1)
beeedaad 1979 {
b121a744 1980 code <<= 8;
b0a1e45e
KH
1981 if (NILP (code2))
1982 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1983 else
b121a744 1984 {
b0a1e45e
KH
1985 CHECK_NATNUM (code2);
1986 if (XFASTINT (code2) >= 0x100)
1987 args_out_of_range (make_number (0xFF), code2);
1988 code |= XFASTINT (code2);
b121a744 1989 }
99529c2c 1990
b0a1e45e 1991 if (dimension > 2)
b121a744
KH
1992 {
1993 code <<= 8;
b0a1e45e
KH
1994 if (NILP (code3))
1995 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1996 else
1997 {
b0a1e45e
KH
1998 CHECK_NATNUM (code3);
1999 if (XFASTINT (code3) >= 0x100)
2000 args_out_of_range (make_number (0xFF), code3);
2001 code |= XFASTINT (code3);
2002 }
2003
2004 if (dimension > 3)
2005 {
2006 code <<= 8;
2007 if (NILP (code4))
2008 code |= charsetp->code_space[0];
2009 else
2010 {
2011 CHECK_NATNUM (code4);
2012 if (XFASTINT (code4) >= 0x100)
2013 args_out_of_range (make_number (0xFF), code4);
2014 code |= XFASTINT (code4);
2015 }
b121a744
KH
2016 }
2017 }
beeedaad 2018 }
859f2b3c 2019 }
beeedaad 2020
b121a744
KH
2021 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2022 code &= 0x7F7F7F7F;
2023 c = DECODE_CHAR (charsetp, code);
2024 if (c < 0)
2025 error ("Invalid code(s)");
3263d5a2 2026 return make_number (c);
4ed46869
KH
2027}
2028
beeedaad 2029
3263d5a2
KH
2030/* Return the first charset in CHARSET_LIST that contains C.
2031 CHARSET_LIST is a list of charset IDs. If it is nil, use
2032 Vcharset_ordered_list. */
beeedaad 2033
3263d5a2
KH
2034struct charset *
2035char_charset (c, charset_list, code_return)
2036 int c;
2037 Lisp_Object charset_list;
2038 unsigned *code_return;
2e344af3 2039{
8a1816bb
KH
2040 int maybe_null = 0;
2041
3263d5a2
KH
2042 if (NILP (charset_list))
2043 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2044 else
2045 maybe_null = 1;
beeedaad 2046
6c652beb 2047 while (CONSP (charset_list))
2e344af3 2048 {
3263d5a2
KH
2049 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2050 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2051
3263d5a2 2052 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2053 {
3263d5a2
KH
2054 if (code_return)
2055 *code_return = code;
2056 return charset;
3f62427c 2057 }
3263d5a2 2058 charset_list = XCDR (charset_list);
6c652beb
KH
2059 if (c <= MAX_UNICODE_CHAR
2060 && EQ (charset_list, Vcharset_non_preferred_head))
2061 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2062 }
8a1816bb
KH
2063 return (maybe_null ? NULL
2064 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2065 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2066}
2067
2e344af3 2068
3263d5a2 2069DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2070 doc:
d0cf2d48 2071 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2072The charset is decided by the current priority order of charsets.
2073A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2074CH in the charset. */)
3263d5a2
KH
2075 (ch)
2076 Lisp_Object ch;
4ed46869 2077{
3263d5a2
KH
2078 struct charset *charset;
2079 int c, dimension;
2080 unsigned code;
4ed46869
KH
2081 Lisp_Object val;
2082
3263d5a2
KH
2083 CHECK_CHARACTER (ch);
2084 c = XFASTINT (ch);
2085 charset = CHAR_CHARSET (c);
2086 if (! charset)
3cc67a4d 2087 abort ();
3263d5a2
KH
2088 code = ENCODE_CHAR (charset, c);
2089 if (code == CHARSET_INVALID_CODE (charset))
2090 abort ();
2091 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2092 for (val = Qnil; dimension > 0; dimension--)
2093 {
2094 val = Fcons (make_number (code & 0xFF), val);
2095 code >>= 8;
2096 }
3263d5a2 2097 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2098}
2099
740f080d 2100
3263d5a2 2101DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
327719ee 2102 doc: /* Return the charset of highest priority that contains CH. */)
fdb82f93 2103 (ch)
4ed46869
KH
2104 Lisp_Object ch;
2105{
3263d5a2 2106 struct charset *charset;
4ed46869 2107
3263d5a2
KH
2108 CHECK_CHARACTER (ch);
2109 charset = CHAR_CHARSET (XINT (ch));
2110 return (CHARSET_NAME (charset));
4ed46869
KH
2111}
2112
17e7ef1b 2113
3263d5a2
KH
2114DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2115 doc: /*
2116Return charset of a character in the current buffer at position POS.
2117If POS is nil, it defauls to the current point.
2118If POS is out of range, the value is nil. */)
2119 (pos)
2120 Lisp_Object pos;
2e344af3 2121{
3263d5a2
KH
2122 Lisp_Object ch;
2123 struct charset *charset;
046b1f03 2124
3263d5a2
KH
2125 ch = Fchar_after (pos);
2126 if (! INTEGERP (ch))
2127 return ch;
2128 charset = CHAR_CHARSET (XINT (ch));
2129 return (CHARSET_NAME (charset));
6ae1f27e 2130}
9036eb45 2131
87b089ad 2132
3263d5a2
KH
2133DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2134 doc: /*
2135Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2136
2137ISO 2022's designation sequence (escape sequence) distinguishes charsets
2138by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2139whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2140See the documentation of the function `charset-info' for the meanings of
2141DIMENSION, CHARS, and FINAL-CHAR. */)
2142 (dimension, chars, final_char)
2143 Lisp_Object dimension, chars, final_char;
6ae1f27e 2144{
3263d5a2 2145 int id;
82215ce9 2146 int chars_flag;
a8a35e61 2147
3263d5a2 2148 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2149 chars_flag = XFASTINT (chars) == 96;
2150 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2151 XFASTINT (final_char));
2152 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2153}
2154
87b089ad 2155
3263d5a2
KH
2156DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2157 0, 0, 0,
2158 doc: /*
ecca2aad
KH
2159Internal use only.
2160Clear temporary charset mapping tables.
2161It should be called only from temacs invoked for dumping. */)
3263d5a2 2162 ()
87b089ad 2163{
53316e55 2164 int i;
3263d5a2
KH
2165 struct charset *charset;
2166 Lisp_Object attrs;
87b089ad 2167
ecca2aad 2168 if (temp_charset_work)
87b089ad 2169 {
ecca2aad
KH
2170 free (temp_charset_work);
2171 temp_charset_work = NULL;
2e344af3 2172 }
2e344af3 2173
ecca2aad
KH
2174 if (CHAR_TABLE_P (Vchar_unify_table))
2175 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2176
3263d5a2 2177 return Qnil;
740f080d
KH
2178}
2179
8ddf5e57
DL
2180DEFUN ("charset-priority-list", Fcharset_priority_list,
2181 Scharset_priority_list, 0, 1, 0,
2182 doc: /* Return the list of charsets ordered by priority.
2183HIGHESTP non-nil means just return the highest priority one. */)
2184 (highestp)
2185 Lisp_Object highestp;
2e344af3 2186{
8ddf5e57 2187 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2188
8ddf5e57 2189 if (!NILP (highestp))
16fed1fc 2190 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2191
8ddf5e57 2192 while (!NILP (list))
2e344af3 2193 {
16fed1fc 2194 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2195 list = XCDR (list);
2e344af3 2196 }
8ddf5e57 2197 return Fnreverse (val);
2e344af3
KH
2198}
2199
8ddf5e57
DL
2200DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2201 1, MANY, 0,
2202 doc: /* Assign higher priority to the charsets given as arguments.
2203usage: (set-charset-priority &rest charsets) */)
2204 (nargs, args)
2205 int nargs;
4ed46869
KH
2206 Lisp_Object *args;
2207{
af7c60ca 2208 Lisp_Object new_head, old_list, arglist[2];
321c819c 2209 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2210 int i, id;
4ed46869 2211
8ddf5e57 2212 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2213 new_head = Qnil;
8ddf5e57 2214 for (i = 0; i < nargs; i++)
4ed46869 2215 {
8ddf5e57 2216 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2217 if (! NILP (Fmemq (make_number (id), old_list)))
2218 {
2219 old_list = Fdelq (make_number (id), old_list);
2220 new_head = Fcons (make_number (id), new_head);
2221 }
5729c92f 2222 }
8ddf5e57 2223 arglist[0] = Fnreverse (new_head);
6a9c90ec 2224 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2225 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2226 charset_ordered_list_tick++;
5729c92f 2227
321c819c 2228 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2229 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2230 {
e77415b0 2231 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2232 list_2022 = Fcons (XCAR (old_list), list_2022);
2233 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2234 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
4ed46869 2235 }
321c819c
KH
2236 Viso_2022_charset_list = Fnreverse (list_2022);
2237 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
4ed46869 2238
8ddf5e57 2239 return Qnil;
4ed46869
KH
2240}
2241
d5b33309
KH
2242DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2243 0, 1, 0,
2244 doc: /* Internal use only.
2245Return charset identification number of CHARSET. */)
2246 (charset)
2247 Lisp_Object charset;
4ed46869 2248{
d5b33309 2249 int id;
4ed46869 2250
d5b33309
KH
2251 CHECK_CHARSET_GET_ID (charset, id);
2252 return make_number (id);
4ed46869
KH
2253}
2254
4ed46869 2255\f
3263d5a2
KH
2256void
2257init_charset ()
4ed46869 2258{
c8f94403
GM
2259 Lisp_Object tempdir;
2260 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2261 if (access (SDATA (tempdir), 0) < 0)
2262 {
2263 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2264Emacs will not function correctly without the character map files.\n\
2265Please check your installation!\n",
2266 tempdir);
2267 /* TODO should this be a fatal error? (Bug#909) */
2268 }
2269
2270 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2271}
2272
4ed46869 2273
dfcf069d 2274void
4ed46869
KH
2275init_charset_once ()
2276{
2277 int i, j, k;
2278
3263d5a2
KH
2279 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2280 for (j = 0; j < ISO_MAX_CHARS; j++)
2281 for (k = 0; k < ISO_MAX_FINAL; k++)
2282 iso_charset_table[i][j][k] = -1;
4ed46869 2283
60383934 2284 for (i = 0; i < 256; i++)
3263d5a2 2285 emacs_mule_charset[i] = NULL;
4ed46869 2286
7c7dceee
KH
2287 charset_jisx0201_roman = -1;
2288 charset_jisx0208_1978 = -1;
2289 charset_jisx0208 = -1;
4ed46869
KH
2290
2291 for (i = 0; i < 128; i++)
14e3d523 2292 unibyte_to_multibyte_table[i] = i;
4ed46869 2293 for (; i < 256; i++)
170e4589 2294 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
4ed46869
KH
2295}
2296
2297#ifdef emacs
2298
dfcf069d 2299void
4ed46869
KH
2300syms_of_charset ()
2301{
3263d5a2
KH
2302 DEFSYM (Qcharsetp, "charsetp");
2303
2304 DEFSYM (Qascii, "ascii");
2305 DEFSYM (Qunicode, "unicode");
6c652beb 2306 DEFSYM (Qemacs, "emacs");
2fe1edd1 2307 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2308 DEFSYM (Qiso_8859_1, "iso-8859-1");
2309
2310 DEFSYM (Qgl, "gl");
2311 DEFSYM (Qgr, "gr");
2312
3263d5a2
KH
2313 staticpro (&Vcharset_ordered_list);
2314 Vcharset_ordered_list = Qnil;
2315
2316 staticpro (&Viso_2022_charset_list);
2317 Viso_2022_charset_list = Qnil;
2318
2319 staticpro (&Vemacs_mule_charset_list);
2320 Vemacs_mule_charset_list = Qnil;
2321
3943ed76
KH
2322 /* Don't staticpro them here. It's done in syms_of_fns. */
2323 QCtest = intern (":test");
2324 Qeq = intern ("eq");
2325
3263d5a2 2326 staticpro (&Vcharset_hash_table);
8f924df7
KH
2327 {
2328 Lisp_Object args[2];
2329 args[0] = QCtest;
2330 args[1] = Qeq;
2331 Vcharset_hash_table = Fmake_hash_table (2, args);
2332 }
3263d5a2
KH
2333
2334 charset_table_size = 128;
2335 charset_table = ((struct charset *)
2336 xmalloc (sizeof (struct charset) * charset_table_size));
2337 charset_table_used = 0;
2338
3263d5a2
KH
2339 defsubr (&Scharsetp);
2340 defsubr (&Smap_charset_chars);
2341 defsubr (&Sdefine_charset_internal);
2342 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2343 defsubr (&Scharset_plist);
2344 defsubr (&Sset_charset_plist);
2345 defsubr (&Sunify_charset);
3fac5a51 2346 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2347 defsubr (&Sdeclare_equiv_charset);
2348 defsubr (&Sfind_charset_region);
2349 defsubr (&Sfind_charset_string);
3263d5a2
KH
2350 defsubr (&Sdecode_char);
2351 defsubr (&Sencode_char);
4ed46869 2352 defsubr (&Ssplit_char);
3263d5a2 2353 defsubr (&Smake_char);
4ed46869 2354 defsubr (&Schar_charset);
90d7b74e 2355 defsubr (&Scharset_after);
4ed46869 2356 defsubr (&Siso_charset);
3263d5a2 2357 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2358 defsubr (&Scharset_priority_list);
2359 defsubr (&Sset_charset_priority);
d5b33309 2360 defsubr (&Scharset_id_internal);
3263d5a2 2361
4beef065 2362 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2363 doc: /* *List of directories to search for charset map files. */);
4beef065 2364 Vcharset_map_path = Qnil;
4ed46869 2365
ecca2aad
KH
2366 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2367 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2368 inhibit_load_charset_map = 0;
2369
4ed46869 2370 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2371 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2372 Vcharset_list = Qnil;
2373
6a9c90ec
KH
2374 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2375 doc: /* ISO639 language mnemonic symbol for the current language environment.
2376If the current language environment is for multiple languages (e.g. "Latin-1"),
2377the value may be a list of mnemonics. */);
2378 Vcurrent_iso639_language = Qnil;
2379
2fe1edd1
KH
2380 charset_ascii
2381 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2382 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2383 charset_iso_8859_1
2384 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2385 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2386 charset_unicode
73fbf2d9 2387 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2388 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2389 charset_emacs
2390 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2391 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2392 charset_eight_bit
2393 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2394 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2395 MAX_5_BYTE_CHAR + 1);
4ed46869
KH
2396}
2397
2398#endif /* emacs */
cefd8c4f
KH
2399
2400/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2401 (do not change this comment) */