* emacs-lisp/re-builder.el (reb-auto-update): Remove redundant code.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403
GM
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
8cabe764 5 2005, 2006, 2007, 2008
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869 33#include <sys/types.h>
4ed46869 34#include "lisp.h"
3263d5a2 35#include "character.h"
4ed46869
KH
36#include "charset.h"
37#include "coding.h"
fc6b09bf 38#include "disptab.h"
3263d5a2 39#include "buffer.h"
4ed46869 40
04c2f2c5 41/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 42
3263d5a2 43 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 44 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
4ed46869 48
3263d5a2
KH
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
04c2f2c5 52 charset_table as a struct charset.
4ed46869 53
3263d5a2 54*/
4ed46869 55
3263d5a2
KH
56/* List of all charsets. This variable is used only from Emacs
57 Lisp. */
4ed46869 58Lisp_Object Vcharset_list;
4ed46869 59
3263d5a2
KH
60/* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62Lisp_Object Vcharset_hash_table;
4ed46869 63
3263d5a2
KH
64/* Table of struct charset. */
65struct charset *charset_table;
4ed46869 66
3263d5a2 67static int charset_table_size;
5af5dd92 68static int charset_table_used;
4ed46869 69
3263d5a2 70Lisp_Object Qcharsetp;
4ed46869 71
3263d5a2
KH
72/* Special charset symbols. */
73Lisp_Object Qascii;
2fe1edd1 74Lisp_Object Qeight_bit;
3263d5a2
KH
75Lisp_Object Qiso_8859_1;
76Lisp_Object Qunicode;
6c652beb 77Lisp_Object Qemacs;
4ed46869 78
3263d5a2
KH
79/* The corresponding charsets. */
80int charset_ascii;
2fe1edd1 81int charset_eight_bit;
3263d5a2
KH
82int charset_iso_8859_1;
83int charset_unicode;
6c652beb 84int charset_emacs;
b0e3cf2b 85
7c7dceee
KH
86/* The other special charsets. */
87int charset_jisx0201_roman;
88int charset_jisx0208_1978;
89int charset_jisx0208;
c1a08b4c 90
3263d5a2
KH
91/* Value of charset attribute `charset-iso-plane'. */
92Lisp_Object Qgl, Qgr;
c1a08b4c 93
d1a04588
KH
94/* Charset of unibyte characters. */
95int charset_unibyte;
4ed46869 96
3263d5a2
KH
97/* List of charsets ordered by the priority. */
98Lisp_Object Vcharset_ordered_list;
4ed46869 99
6a9c90ec
KH
100/* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102Lisp_Object Vcharset_non_preferred_head;
103
dbbb237d 104/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 105 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
106 -1. */
107unsigned short charset_ordered_list_tick;
4ed46869 108
3263d5a2
KH
109/* List of iso-2022 charsets. */
110Lisp_Object Viso_2022_charset_list;
35e623fb 111
3263d5a2
KH
112/* List of emacs-mule charsets. */
113Lisp_Object Vemacs_mule_charset_list;
114
115struct charset *emacs_mule_charset[256];
4ed46869
KH
116
117/* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
119int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
4beef065 121Lisp_Object Vcharset_map_path;
3263d5a2 122
ecca2aad
KH
123/* If nonzero, don't load charset maps. */
124int inhibit_load_charset_map;
3263d5a2 125
6a9c90ec
KH
126Lisp_Object Vcurrent_iso639_language;
127
64165ae2
DL
128/* Defined in chartab.c */
129extern void
130map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
134
69f8de5b
KH
135#define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
820ee249
KH
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
3263d5a2
KH
150 : -1)
151
152
153/* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
155
820ee249
KH
156#define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
4cf9710d 169
ecca2aad
KH
170/* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
8a73a704 172
ecca2aad
KH
173static struct
174{
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203} *temp_charset_work;
204
205#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
046b1f03 227\f
93bcb785 228
e9ce014c
KH
229/* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
3263d5a2 231int charset_map_loaded;
4ed46869 232
e9ce014c 233struct charset_map_entries
4ed46869 234{
e9ce014c
KH
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
240};
241
ecca2aad
KH
242/* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
4cf9710d 251
ecca2aad
KH
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 257
ecca2aad 258 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 259
ecca2aad
KH
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265*/
4ed46869 266
3263d5a2 267static void
e9ce014c 268load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 269 struct charset *charset;
e9ce014c
KH
270 struct charset_map_entries *entries;
271 int n_entries;
3263d5a2 272 int control_flag;
4ed46869 273{
3263d5a2 274 Lisp_Object vec, table;
3263d5a2
KH
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
3263d5a2 278 int i;
3263d5a2 279 unsigned char *fast_map = charset->fast_map;
99529c2c 280
e9ce014c
KH
281 if (n_entries <= 0)
282 return;
283
ecca2aad 284 if (control_flag)
8ac5a9cc 285 {
ecca2aad
KH
286 if (! inhibit_load_charset_map)
287 {
288 if (control_flag == 1)
289 {
290 if (charset->method == CHARSET_METHOD_MAP)
291 {
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 293
ecca2aad
KH
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
3263d5a2 333 charset_map_loaded = 1;
2e344af3 334 }
6662e69b 335
e9ce014c 336 min_char = max_char = entries->entry[0].c;
3263d5a2 337 nonascii_min_char = MAX_CHAR;
e9ce014c 338 for (i = 0; i < n_entries; i++)
2e344af3 339 {
e9ce014c 340 unsigned from, to;
3b4f4446
KH
341 int from_index, to_index;
342 int from_c, to_c;
e9ce014c 343 int idx = i % 0x10000;
3263d5a2 344
e9ce014c
KH
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
3b4f4446
KH
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
6662e69b 352 {
3b4f4446
KH
353 to_index = from_index;
354 to_c = from_c;
6662e69b 355 }
3b4f4446 356 else
6662e69b 357 {
3b4f4446
KH
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
6662e69b 360 }
3b4f4446
KH
361 if (from_index < 0 || to_index < 0)
362 continue;
3263d5a2 363
ecca2aad
KH
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
6662e69b 368
ecca2aad
KH
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
3b4f4446
KH
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
177c0ea7 418
ecca2aad
KH
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 421 }
8ac5a9cc 422 }
3263d5a2 423
ecca2aad 424 if (control_flag == 0)
4ed46869 425 {
3263d5a2
KH
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 429 }
ecca2aad
KH
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
4ed46869
KH
435}
436
12bcae05 437
3263d5a2
KH
438/* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
12bcae05 440
3263d5a2
KH
441static INLINE unsigned
442read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
12bcae05 445{
3263d5a2
KH
446 int c;
447 unsigned n;
12bcae05 448
3263d5a2
KH
449 while ((c = getc (fp)) != EOF)
450 {
69f8de5b 451 if (c == '#')
3263d5a2
KH
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
8f924df7 460 }
3263d5a2
KH
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
e9ce014c
KH
475 if (c != EOF)
476 ungetc (c, fp);
3263d5a2
KH
477 return n;
478}
12bcae05 479
537efd8d 480
3263d5a2 481/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
482 Each line of MAPFILE has this form
483 0xAAAA 0xCCCC
484 where 0xAAAA is a code-point and 0xCCCC is the corresponding
485 character code, or this form
486 0xAAAA-0xBBBB 0xCCCC
487 where 0xAAAA and 0xBBBB are code-points specifying a range, and
488 0xCCCC is the first character code of the range.
4ed46869 489
3263d5a2
KH
490 The returned vector has this form:
491 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
492 where CODE1 is a code-point or a cons of code-points specifying a
493 range. */
4ed46869 494
c449997d 495extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
4ed46869 496
e9ce014c
KH
497static void
498load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
499 struct charset *charset;
500 Lisp_Object mapfile;
e9ce014c 501 int control_flag;
4ed46869 502{
e9ce014c
KH
503 unsigned min_code = CHARSET_MIN_CODE (charset);
504 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
505 int fd;
506 FILE *fp;
3263d5a2
KH
507 int eof;
508 Lisp_Object suffixes;
e9ce014c
KH
509 struct charset_map_entries *head, *entries;
510 int n_entries;
4ed46869 511
3263d5a2
KH
512 suffixes = Fcons (build_string (".map"),
513 Fcons (build_string (".TXT"), Qnil));
4ed46869 514
4beef065 515 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
3263d5a2
KH
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 {
519 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 520 return;
3263d5a2 521 }
4ed46869 522
e9ce014c
KH
523 head = entries = ((struct charset_map_entries *)
524 alloca (sizeof (struct charset_map_entries)));
525 n_entries = 0;
3263d5a2
KH
526 eof = 0;
527 while (1)
528 {
e9ce014c
KH
529 unsigned from, to;
530 int c;
531 int idx;
4ed46869 532
e9ce014c 533 from = read_hex (fp, &eof);
3263d5a2
KH
534 if (eof)
535 break;
e9ce014c
KH
536 if (getc (fp) == '-')
537 to = read_hex (fp, &eof);
538 else
539 to = from;
540 c = (int) read_hex (fp, &eof);
ac4137cc 541
e9ce014c
KH
542 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
543 continue;
ac4137cc 544
e9ce014c 545 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 546 {
e9ce014c
KH
547 entries->next = ((struct charset_map_entries *)
548 alloca (sizeof (struct charset_map_entries)));
549 entries = entries->next;
3263d5a2 550 }
e9ce014c
KH
551 idx = n_entries % 0x10000;
552 entries->entry[idx].from = from;
553 entries->entry[idx].to = to;
554 entries->entry[idx].c = c;
555 n_entries++;
3263d5a2
KH
556 }
557 fclose (fp);
558 close (fd);
177c0ea7 559
e9ce014c 560 load_charset_map (charset, head, n_entries, control_flag);
4ed46869
KH
561}
562
e9ce014c
KH
563static void
564load_charset_map_from_vector (charset, vec, control_flag)
565 struct charset *charset;
566 Lisp_Object vec;
567 int control_flag;
23d2a7f1 568{
e9ce014c
KH
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
23d2a7f1 575
e9ce014c 576 if (len % 2 == 1)
3263d5a2 577 {
e9ce014c
KH
578 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
579 return;
3263d5a2 580 }
35e623fb 581
e9ce014c
KH
582 head = entries = ((struct charset_map_entries *)
583 alloca (sizeof (struct charset_map_entries)));
584 n_entries = 0;
585 for (i = 0; i < len; i += 2)
35e623fb 586 {
e9ce014c
KH
587 Lisp_Object val, val2;
588 unsigned from, to;
589 int c;
590 int idx;
d2665018 591
e9ce014c
KH
592 val = AREF (vec, i);
593 if (CONSP (val))
bbf12bb3 594 {
e9ce014c
KH
595 val2 = XCDR (val);
596 val = XCAR (val);
597 CHECK_NATNUM (val);
598 CHECK_NATNUM (val2);
599 from = XFASTINT (val);
600 to = XFASTINT (val2);
bbf12bb3 601 }
e9ce014c 602 else
bbf12bb3 603 {
e9ce014c
KH
604 CHECK_NATNUM (val);
605 from = to = XFASTINT (val);
bbf12bb3 606 }
e9ce014c
KH
607 val = AREF (vec, i + 1);
608 CHECK_NATNUM (val);
609 c = XFASTINT (val);
76d7b829 610
e9ce014c
KH
611 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
612 continue;
76d7b829 613
dcc694d7 614 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c
KH
615 {
616 entries->next = ((struct charset_map_entries *)
617 alloca (sizeof (struct charset_map_entries)));
618 entries = entries->next;
619 }
620 idx = n_entries % 0x10000;
621 entries->entry[idx].from = from;
622 entries->entry[idx].to = to;
623 entries->entry[idx].c = c;
624 n_entries++;
625 }
76d7b829 626
e9ce014c 627 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
628}
629
ecca2aad
KH
630
631/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
632 map it is (see the comment of load_charset_map for the detail). */
633
3263d5a2 634static void
ecca2aad 635load_charset (charset, control_flag)
3263d5a2 636 struct charset *charset;
ecca2aad 637 int control_flag;
76d7b829 638{
ecca2aad 639 Lisp_Object map;
76d7b829 640
ecca2aad
KH
641 if (inhibit_load_charset_map
642 && temp_charset_work
643 && charset == temp_charset_work->current
644 && (control_flag == 2 == temp_charset_work->for_encoder))
645 return;
646
647 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
648 map = CHARSET_MAP (charset);
649 else if (CHARSET_UNIFIED_P (charset))
650 map = CHARSET_UNIFY_MAP (charset);
651 if (STRINGP (map))
652 load_charset_map_from_file (charset, map, control_flag);
653 else
654 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 655}
76d7b829 656
3263d5a2
KH
657
658DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
659 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
660 (object)
661 Lisp_Object object;
23d2a7f1 662{
3263d5a2 663 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
664}
665
4ed46869 666
ecca2aad
KH
667void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
668 Lisp_Object function, Lisp_Object arg,
669 unsigned from, unsigned to));
670
671void
672map_charset_for_dump (c_function, function, arg, from, to)
673 void (*c_function) (Lisp_Object, Lisp_Object);
674 Lisp_Object function, arg;
675 unsigned from, to;
676{
677 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
678 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
679 Lisp_Object range;
680 int c, stop;
681 struct gcpro gcpro1;
682
683 range = Fcons (Qnil, Qnil);
684 GCPRO1 (range);
685
686 c = temp_charset_work->min_char;
687 stop = (temp_charset_work->max_char < 0x20000
688 ? temp_charset_work->max_char : 0xFFFF);
689
690 while (1)
691 {
692 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
693
694 if (index >= from_idx && index <= to_idx)
695 {
696 if (NILP (XCAR (range)))
697 XSETCAR (range, make_number (c));
698 }
699 else if (! NILP (XCAR (range)))
700 {
701 XSETCDR (range, make_number (c - 1));
702 if (c_function)
703 (*c_function) (arg, range);
704 else
705 call2 (function, range, arg);
706 XSETCAR (range, Qnil);
707 }
708 if (c == stop)
709 {
710 if (c == temp_charset_work->max_char)
711 {
712 if (! NILP (XCAR (range)))
713 {
714 XSETCDR (range, make_number (c));
715 if (c_function)
716 (*c_function) (arg, range);
717 else
718 call2 (function, range, arg);
719 }
720 break;
721 }
722 c = 0x1FFFF;
723 stop = temp_charset_work->max_char;
724 }
725 c++;
726 }
727}
728
4ed46869 729void
374c5cfd
KH
730map_charset_chars (c_function, function, arg,
731 charset, from, to)
732 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
733 Lisp_Object function, arg;
734 struct charset *charset;
735 unsigned from, to;
4ed46869 736{
3263d5a2 737 Lisp_Object range;
374c5cfd 738 int partial;
3263d5a2 739
374c5cfd
KH
740 partial = (from > CHARSET_MIN_CODE (charset)
741 || to < CHARSET_MAX_CODE (charset));
742
3263d5a2 743 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 744 {
374c5cfd
KH
745 int from_idx = CODE_POINT_TO_INDEX (charset, from);
746 int to_idx = CODE_POINT_TO_INDEX (charset, to);
747 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
748 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
749
ecca2aad
KH
750 if (CHARSET_UNIFIED_P (charset))
751 {
752 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
753 load_charset (charset, 2);
754 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
755 map_char_table_for_charset (c_function, function,
756 CHARSET_DEUNIFIER (charset), arg,
757 partial ? charset : NULL, from, to);
758 else
759 map_charset_for_dump (c_function, function, arg, from, to);
760 }
761
374c5cfd 762 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 763 if (NILP (function))
5af5dd92 764 (*c_function) (arg, range);
3263d5a2
KH
765 else
766 call2 (function, range, arg);
c83ef371 767 }
3263d5a2
KH
768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
769 {
770 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
771 load_charset (charset, 2);
772 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
773 map_char_table_for_charset (c_function, function,
774 CHARSET_ENCODER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 778 }
374c5cfd 779 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 780 {
374c5cfd
KH
781 Lisp_Object subset_info;
782 int offset;
783
784 subset_info = CHARSET_SUBSET (charset);
785 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
786 offset = XINT (AREF (subset_info, 3));
787 from -= offset;
788 if (from < XFASTINT (AREF (subset_info, 1)))
789 from = XFASTINT (AREF (subset_info, 1));
790 to -= offset;
791 if (to > XFASTINT (AREF (subset_info, 2)))
792 to = XFASTINT (AREF (subset_info, 2));
793 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 794 }
374c5cfd
KH
795 else /* i.e. CHARSET_METHOD_SUPERSET */
796 {
797 Lisp_Object parents;
4ed46869 798
374c5cfd
KH
799 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
800 parents = XCDR (parents))
bbf12bb3 801 {
374c5cfd
KH
802 int offset;
803 unsigned this_from, this_to;
804
805 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
806 offset = XINT (XCDR (XCAR (parents)));
807 this_from = from - offset;
808 this_to = to - offset;
809 if (this_from < CHARSET_MIN_CODE (charset))
810 this_from = CHARSET_MIN_CODE (charset);
811 if (this_to > CHARSET_MAX_CODE (charset))
812 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
813 map_charset_chars (c_function, function, arg, charset,
814 this_from, this_to);
bbf12bb3 815 }
35e623fb 816 }
4ed46869
KH
817}
818
374c5cfd 819DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 820 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 821FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 822argument ARG.
4ed46869 823
374c5cfd
KH
824RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
825characters contained in CHARSET.
4ed46869 826
374c5cfd 827The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
8f924df7 828range of code points of target characters. */)
374c5cfd
KH
829 (function, charset, arg, from_code, to_code)
830 Lisp_Object function, charset, arg, from_code, to_code;
4ed46869 831{
374c5cfd 832 struct charset *cs;
16fed1fc 833 unsigned from, to;
4ed46869 834
374c5cfd
KH
835 CHECK_CHARSET_GET_CHARSET (charset, cs);
836 if (NILP (from_code))
16fed1fc 837 from = CHARSET_MIN_CODE (cs);
970b7474 838 else
4ed46869 839 {
970b7474
KH
840 CHECK_NATNUM (from_code);
841 from = XINT (from_code);
842 if (from < CHARSET_MIN_CODE (cs))
843 from = CHARSET_MIN_CODE (cs);
4ed46869 844 }
374c5cfd 845 if (NILP (to_code))
970b7474 846 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
847 else
848 {
970b7474
KH
849 CHECK_NATNUM (to_code);
850 to = XINT (to_code);
851 if (to > CHARSET_MAX_CODE (cs))
852 to = CHARSET_MAX_CODE (cs);
4ed46869 853 }
16fed1fc 854 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 855 return Qnil;
35e623fb 856}
4ed46869 857
4ed46869 858
3263d5a2
KH
859/* Define a charset according to the arguments. The Nth argument is
860 the Nth attribute of the charset (the last attribute `charset-id'
861 is not included). See the docstring of `define-charset' for the
862 detail. */
4ed46869 863
3263d5a2
KH
864DEFUN ("define-charset-internal", Fdefine_charset_internal,
865 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
866 doc: /* For internal use only.
867usage: (define-charset-internal ...) */)
3263d5a2
KH
868 (nargs, args)
869 int nargs;
870 Lisp_Object *args;
4ed46869 871{
3263d5a2
KH
872 /* Charset attr vector. */
873 Lisp_Object attrs;
874 Lisp_Object val;
875 unsigned hash_code;
876 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 877 int i, j;
3263d5a2
KH
878 struct charset charset;
879 int id;
880 int dimension;
881 int new_definition_p;
882 int nchars;
883
884 if (nargs != charset_arg_max)
885 return Fsignal (Qwrong_number_of_arguments,
886 Fcons (intern ("define-charset-internal"),
887 make_number (nargs)));
888
889 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
890
891 CHECK_SYMBOL (args[charset_arg_name]);
892 ASET (attrs, charset_name, args[charset_arg_name]);
893
894 val = args[charset_arg_code_space];
895 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 896 {
3263d5a2
KH
897 int min_byte, max_byte;
898
899 min_byte = XINT (Faref (val, make_number (i * 2)));
900 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
901 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
902 error ("Invalid :code-space value");
903 charset.code_space[i * 4] = min_byte;
904 charset.code_space[i * 4 + 1] = max_byte;
905 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
906 nchars *= charset.code_space[i * 4 + 2];
907 charset.code_space[i * 4 + 3] = nchars;
908 if (max_byte > 0)
909 dimension = i + 1;
910 }
4ed46869 911
3263d5a2
KH
912 val = args[charset_arg_dimension];
913 if (NILP (val))
914 charset.dimension = dimension;
915 else
4ed46869 916 {
3263d5a2
KH
917 CHECK_NATNUM (val);
918 charset.dimension = XINT (val);
919 if (charset.dimension < 1 || charset.dimension > 4)
920 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
921 }
922
3263d5a2
KH
923 charset.code_linear_p
924 = (charset.dimension == 1
925 || (charset.code_space[2] == 256
926 && (charset.dimension == 2
927 || (charset.code_space[6] == 256
928 && (charset.dimension == 3
929 || charset.code_space[10] == 256)))));
930
69f8de5b 931 if (! charset.code_linear_p)
4ed46869 932 {
69f8de5b 933 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 934 bzero (charset.code_space_mask, 256);
69f8de5b
KH
935 for (i = 0; i < 4; i++)
936 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
937 j++)
938 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
939 }
940
3263d5a2 941 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 942
3263d5a2
KH
943 charset.min_code = (charset.code_space[0]
944 | (charset.code_space[4] << 8)
945 | (charset.code_space[8] << 16)
946 | (charset.code_space[12] << 24));
947 charset.max_code = (charset.code_space[1]
948 | (charset.code_space[5] << 8)
949 | (charset.code_space[9] << 16)
950 | (charset.code_space[13] << 24));
820ee249 951 charset.char_index_offset = 0;
8a73a704 952
820ee249
KH
953 val = args[charset_arg_min_code];
954 if (! NILP (val))
955 {
956 unsigned code;
fdb82f93 957
820ee249
KH
958 if (INTEGERP (val))
959 code = XINT (val);
960 else
961 {
962 CHECK_CONS (val);
8f924df7
KH
963 CHECK_NUMBER_CAR (val);
964 CHECK_NUMBER_CDR (val);
820ee249
KH
965 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
966 }
967 if (code < charset.min_code
968 || code > charset.max_code)
969 args_out_of_range_3 (make_number (charset.min_code),
970 make_number (charset.max_code), val);
971 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
972 charset.min_code = code;
973 }
3fac5a51 974
820ee249
KH
975 val = args[charset_arg_max_code];
976 if (! NILP (val))
3fac5a51 977 {
820ee249
KH
978 unsigned code;
979
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
983 {
984 CHECK_CONS (val);
8f924df7
KH
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
820ee249
KH
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
988 }
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.max_code = code;
3fac5a51 994 }
3fac5a51 995
ecca2aad 996 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 997
3263d5a2
KH
998 val = args[charset_arg_invalid_code];
999 if (NILP (val))
1000 {
1001 if (charset.min_code > 0)
1002 charset.invalid_code = 0;
bbf12bb3
KH
1003 else
1004 {
3263d5a2
KH
1005 XSETINT (val, charset.max_code + 1);
1006 if (XINT (val) == charset.max_code + 1)
1007 charset.invalid_code = charset.max_code + 1;
1008 else
1009 error ("Attribute :invalid-code must be specified");
76d7b829 1010 }
76d7b829 1011 }
3263d5a2
KH
1012 else
1013 {
1014 CHECK_NATNUM (val);
1015 charset.invalid_code = XFASTINT (val);
1016 }
4ed46869 1017
3263d5a2
KH
1018 val = args[charset_arg_iso_final];
1019 if (NILP (val))
1020 charset.iso_final = -1;
1021 else
1022 {
1023 CHECK_NUMBER (val);
1024 if (XINT (val) < '0' || XINT (val) > 127)
1025 error ("Invalid iso-final-char: %d", XINT (val));
1026 charset.iso_final = XINT (val);
1027 }
4ed46869 1028
3263d5a2
KH
1029 val = args[charset_arg_iso_revision];
1030 if (NILP (val))
1031 charset.iso_revision = -1;
1032 else
4ed46869 1033 {
3263d5a2
KH
1034 CHECK_NUMBER (val);
1035 if (XINT (val) > 63)
1036 args_out_of_range (make_number (63), val);
1037 charset.iso_revision = XINT (val);
4ed46869 1038 }
4ed46869 1039
3263d5a2
KH
1040 val = args[charset_arg_emacs_mule_id];
1041 if (NILP (val))
1042 charset.emacs_mule_id = -1;
4ed46869
KH
1043 else
1044 {
3263d5a2
KH
1045 CHECK_NATNUM (val);
1046 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1047 error ("Invalid emacs-mule-id: %d", XINT (val));
1048 charset.emacs_mule_id = XINT (val);
c83ef371 1049 }
f6302ac9 1050
3263d5a2 1051 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1052
3263d5a2 1053 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1054
3263d5a2
KH
1055 charset.unified_p = 0;
1056
1057 bzero (charset.fast_map, sizeof (charset.fast_map));
1058
1059 if (! NILP (args[charset_arg_code_offset]))
1060 {
1061 val = args[charset_arg_code_offset];
1062 CHECK_NUMBER (val);
1063
1064 charset.method = CHARSET_METHOD_OFFSET;
1065 charset.code_offset = XINT (val);
1066
1067 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1068 charset.min_char = i + charset.code_offset;
1069 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1070 charset.max_char = i + charset.code_offset;
1071 if (charset.max_char > MAX_CHAR)
1072 error ("Unsupported max char: %d", charset.max_char);
1073
f148205f
KH
1074 i = (charset.min_char >> 7) << 7;
1075 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1076 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1077 i = (i >> 12) << 12;
3263d5a2
KH
1078 for (; i <= charset.max_char; i += 0x1000)
1079 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1080 }
1081 else if (! NILP (args[charset_arg_map]))
1082 {
1083 val = args[charset_arg_map];
1084 ASET (attrs, charset_map, val);
ecca2aad 1085 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1086 }
374c5cfd 1087 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1088 {
374c5cfd
KH
1089 Lisp_Object parent;
1090 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1091 struct charset *parent_charset;
1092
1093 val = args[charset_arg_subset];
1094 parent = Fcar (val);
1095 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1096 parent_min_code = Fnth (make_number (1), val);
1097 CHECK_NATNUM (parent_min_code);
1098 parent_max_code = Fnth (make_number (2), val);
1099 CHECK_NATNUM (parent_max_code);
1100 parent_code_offset = Fnth (make_number (3), val);
1101 CHECK_NUMBER (parent_code_offset);
1102 val = Fmake_vector (make_number (4), Qnil);
1103 ASET (val, 0, make_number (parent_charset->id));
1104 ASET (val, 1, parent_min_code);
1105 ASET (val, 2, parent_max_code);
1106 ASET (val, 3, parent_code_offset);
1107 ASET (attrs, charset_subset, val);
1108
1109 charset.method = CHARSET_METHOD_SUBSET;
1110 /* Here, we just copy the parent's fast_map. It's not accurate,
1111 but at least it works for quickly detecting which character
1112 DOESN'T belong to this charset. */
1113 for (i = 0; i < 190; i++)
1114 charset.fast_map[i] = parent_charset->fast_map[i];
1115
1116 /* We also copy these for parents. */
1117 charset.min_char = parent_charset->min_char;
1118 charset.max_char = parent_charset->max_char;
1119 }
1120 else if (! NILP (args[charset_arg_superset]))
0282eb69 1121 {
374c5cfd
KH
1122 val = args[charset_arg_superset];
1123 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1124 val = Fcopy_sequence (val);
374c5cfd 1125 ASET (attrs, charset_superset, val);
3263d5a2
KH
1126
1127 charset.min_char = MAX_CHAR;
1128 charset.max_char = 0;
1129 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1130 {
3263d5a2
KH
1131 Lisp_Object elt, car_part, cdr_part;
1132 int this_id, offset;
1133 struct charset *this_charset;
2e344af3 1134
3263d5a2
KH
1135 elt = Fcar (val);
1136 if (CONSP (elt))
2e344af3 1137 {
3263d5a2
KH
1138 car_part = XCAR (elt);
1139 cdr_part = XCDR (elt);
1140 CHECK_CHARSET_GET_ID (car_part, this_id);
1141 CHECK_NUMBER (cdr_part);
1142 offset = XINT (cdr_part);
177c0ea7 1143 }
3263d5a2 1144 else
4ed46869 1145 {
3263d5a2
KH
1146 CHECK_CHARSET_GET_ID (elt, this_id);
1147 offset = 0;
4ed46869 1148 }
3263d5a2
KH
1149 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1150
1151 this_charset = CHARSET_FROM_ID (this_id);
1152 if (charset.min_char > this_charset->min_char)
1153 charset.min_char = this_charset->min_char;
1154 if (charset.max_char < this_charset->max_char)
1155 charset.max_char = this_charset->max_char;
1156 for (i = 0; i < 190; i++)
1157 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1158 }
0282eb69 1159 }
2e344af3 1160 else
3263d5a2 1161 error ("None of :code-offset, :map, :parents are specified");
05505664 1162
3263d5a2
KH
1163 val = args[charset_arg_unify_map];
1164 if (! NILP (val) && !STRINGP (val))
1165 CHECK_VECTOR (val);
1166 ASET (attrs, charset_unify_map, val);
05505664 1167
3263d5a2
KH
1168 CHECK_LIST (args[charset_arg_plist]);
1169 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1170
3263d5a2
KH
1171 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1172 &hash_code);
1173 if (charset.hash_index >= 0)
1174 {
1175 new_definition_p = 0;
4f65af01 1176 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1177 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1178 }
1a45ff10 1179 else
3263d5a2
KH
1180 {
1181 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1182 hash_code);
1183 if (charset_table_used == charset_table_size)
1184 {
2fe1edd1
KH
1185 struct charset *new_table
1186 = (struct charset *) xmalloc (sizeof (struct charset)
1187 * (charset_table_size + 16));
1188 bcopy (charset_table, new_table,
1189 sizeof (struct charset) * charset_table_size);
1190 charset_table_size += 16;
1191 charset_table = new_table;
3263d5a2
KH
1192 }
1193 id = charset_table_used++;
3263d5a2
KH
1194 new_definition_p = 1;
1195 }
2e344af3 1196
4f65af01 1197 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1198 charset.id = id;
1199 charset_table[id] = charset;
2e344af3 1200
ecca2aad
KH
1201 if (charset.method == CHARSET_METHOD_MAP)
1202 load_charset (&charset, 0);
1203
3263d5a2 1204 if (charset.iso_final >= 0)
4ed46869 1205 {
3263d5a2
KH
1206 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1207 charset.iso_final) = id;
1208 if (new_definition_p)
1209 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1210 Fcons (make_number (id), Qnil));
7c7dceee
KH
1211 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1212 charset_jisx0201_roman = id;
1213 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1214 charset_jisx0208_1978 = id;
1215 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1216 charset_jisx0208 = id;
4ed46869 1217 }
d0cf2d48 1218
3263d5a2 1219 if (charset.emacs_mule_id >= 0)
4ed46869 1220 {
3263d5a2 1221 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1222 if (charset.emacs_mule_id < 0xA0)
1223 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1224 else
1225 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1226 if (new_definition_p)
1227 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1228 Fcons (make_number (id), Qnil));
4ed46869
KH
1229 }
1230
3263d5a2
KH
1231 if (new_definition_p)
1232 {
1233 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1234 if (charset.supplementary_p)
1235 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1236 Fcons (make_number (id), Qnil));
1237 else
880820fe
KH
1238 {
1239 Lisp_Object tail;
1240
1241 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1242 {
1243 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1244
1245 if (cs->supplementary_p)
1246 break;
1247 }
1248 if (EQ (tail, Vcharset_ordered_list))
1249 Vcharset_ordered_list = Fcons (make_number (id),
1250 Vcharset_ordered_list);
1251 else if (NILP (tail))
1252 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1253 Fcons (make_number (id), Qnil));
1254 else
1255 {
1256 val = Fcons (XCAR (tail), XCDR (tail));
1257 XSETCDR (tail, val);
1258 XSETCAR (tail, make_number (id));
1259 }
1260 }
dbbb237d 1261 charset_ordered_list_tick++;
3263d5a2 1262 }
4ed46869 1263
3263d5a2 1264 return Qnil;
4ed46869
KH
1265}
1266
2fe1edd1
KH
1267
1268/* Same as Fdefine_charset_internal but arguments are more convenient
1269 to call from C (typically in syms_of_charset). This can define a
1270 charset of `offset' method only. Return the ID of the new
1271 charset. */
1272
1273static int
1274define_charset_internal (name, dimension, code_space, min_code, max_code,
1275 iso_final, iso_revision, emacs_mule_id,
7acf89e6 1276 ascii_compatible, supplementary,
2fe1edd1
KH
1277 code_offset)
1278 Lisp_Object name;
1279 int dimension;
1280 unsigned char *code_space;
1281 unsigned min_code, max_code;
1282 int iso_final, iso_revision, emacs_mule_id;
7acf89e6 1283 int ascii_compatible, supplementary;
2fe1edd1
KH
1284 int code_offset;
1285{
1286 Lisp_Object args[charset_arg_max];
1287 Lisp_Object plist[14];
1288 Lisp_Object val;
1289 int i;
1290
1291 args[charset_arg_name] = name;
1292 args[charset_arg_dimension] = make_number (dimension);
1293 val = Fmake_vector (make_number (8), make_number (0));
1294 for (i = 0; i < 8; i++)
1295 ASET (val, i, make_number (code_space[i]));
1296 args[charset_arg_code_space] = val;
1297 args[charset_arg_min_code] = make_number (min_code);
1298 args[charset_arg_max_code] = make_number (max_code);
1299 args[charset_arg_iso_final]
1300 = (iso_final < 0 ? Qnil : make_number (iso_final));
1301 args[charset_arg_iso_revision] = make_number (iso_revision);
1302 args[charset_arg_emacs_mule_id]
1303 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1304 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1305 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1306 args[charset_arg_invalid_code] = Qnil;
1307 args[charset_arg_code_offset] = make_number (code_offset);
1308 args[charset_arg_map] = Qnil;
1309 args[charset_arg_subset] = Qnil;
1310 args[charset_arg_superset] = Qnil;
1311 args[charset_arg_unify_map] = Qnil;
1312
1313 plist[0] = intern (":name");
1314 plist[1] = args[charset_arg_name];
1315 plist[2] = intern (":dimension");
1316 plist[3] = args[charset_arg_dimension];
1317 plist[4] = intern (":code-space");
1318 plist[5] = args[charset_arg_code_space];
1319 plist[6] = intern (":iso-final-char");
1320 plist[7] = args[charset_arg_iso_final];
1321 plist[8] = intern (":emacs-mule-id");
1322 plist[9] = args[charset_arg_emacs_mule_id];
1323 plist[10] = intern (":ascii-compatible-p");
1324 plist[11] = args[charset_arg_ascii_compatible_p];
1325 plist[12] = intern (":code-offset");
1326 plist[13] = args[charset_arg_code_offset];
1327
1328 args[charset_arg_plist] = Flist (14, plist);
1329 Fdefine_charset_internal (charset_arg_max, args);
1330
1331 return XINT (CHARSET_SYMBOL_ID (name));
1332}
1333
1334
3263d5a2
KH
1335DEFUN ("define-charset-alias", Fdefine_charset_alias,
1336 Sdefine_charset_alias, 2, 2, 0,
1337 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1338 (alias, charset)
1339 Lisp_Object alias, charset;
4ed46869 1340{
3263d5a2
KH
1341 Lisp_Object attr;
1342
1343 CHECK_CHARSET_GET_ATTR (charset, attr);
1344 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1345 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1346 return Qnil;
1347}
4ed46869 1348
4ed46869 1349
3263d5a2 1350DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1351 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1352 (charset)
1353 Lisp_Object charset;
1354{
1355 Lisp_Object attrs;
1356
1357 CHECK_CHARSET_GET_ATTR (charset, attrs);
1358 return CHARSET_ATTR_PLIST (attrs);
1359}
1360
1361
1362DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1363 doc: /* Set CHARSET's property list to PLIST. */)
1364 (charset, plist)
1365 Lisp_Object charset, plist;
1366{
1367 Lisp_Object attrs;
1368
1369 CHECK_CHARSET_GET_ATTR (charset, attrs);
1370 CHARSET_ATTR_PLIST (attrs) = plist;
1371 return plist;
1372}
1373
1374
dbbb237d 1375DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1376 doc: /* Unify characters of CHARSET with Unicode.
1377This means reading the relevant file and installing the table defined
dbbb237d
KH
1378by CHARSET's `:unify-map' property.
1379
64165ae2
DL
1380Optional second arg UNIFY-MAP is a file name string or a vector. It has
1381the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1382`define-charset' (which see).
1383
1384Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1385 (charset, unify_map, deunify)
1386 Lisp_Object charset, unify_map, deunify;
8a73a704 1387{
3263d5a2
KH
1388 int id;
1389 struct charset *cs;
8f924df7 1390
3263d5a2
KH
1391 CHECK_CHARSET_GET_ID (charset, id);
1392 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1393 if (NILP (deunify)
1394 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1395 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1396 return Qnil;
dbbb237d 1397
3263d5a2 1398 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1399 if (NILP (deunify))
1400 {
ecca2aad
KH
1401 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1402 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1403 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1404 if (NILP (unify_map))
1405 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1406 else
ecca2aad
KH
1407 {
1408 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1409 signal_error ("Bad unify-map", unify_map);
1410 CHARSET_UNIFY_MAP (cs) = unify_map;
1411 }
1412 if (NILP (Vchar_unify_table))
1413 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1414 char_table_set_range (Vchar_unify_table,
1415 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1416 CHARSET_UNIFIED_P (cs) = 1;
1417 }
1418 else if (CHAR_TABLE_P (Vchar_unify_table))
1419 {
1420 int min_code = CHARSET_MIN_CODE (cs);
1421 int max_code = CHARSET_MAX_CODE (cs);
1422 int min_char = DECODE_CHAR (cs, min_code);
1423 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1424
dbbb237d
KH
1425 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1426 }
8f924df7 1427
3263d5a2 1428 return Qnil;
8a73a704
KH
1429}
1430
3fac5a51
KH
1431DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1432 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1433 doc: /*
d0cf2d48 1434Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1435DIMENSION is the number of bytes to represent a character: 1 or 2.
1436CHARS is the number of characters in a dimension: 94 or 96.
1437
1438This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1439If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1440return nil. */)
1441 (dimension, chars)
3fac5a51
KH
1442 Lisp_Object dimension, chars;
1443{
1444 int final_char;
1445
b7826503
PJ
1446 CHECK_NUMBER (dimension);
1447 CHECK_NUMBER (chars);
3263d5a2
KH
1448 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1449 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1450 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1451 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1452 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1453 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1454 break;
3fac5a51
KH
1455 return (final_char <= '?' ? make_number (final_char) : Qnil);
1456}
1457
3263d5a2
KH
1458static void
1459check_iso_charset_parameter (dimension, chars, final_char)
1460 Lisp_Object dimension, chars, final_char;
4ed46869 1461{
3263d5a2
KH
1462 CHECK_NATNUM (dimension);
1463 CHECK_NATNUM (chars);
1464 CHECK_NATNUM (final_char);
4ed46869 1465
3263d5a2
KH
1466 if (XINT (dimension) > 3)
1467 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1468 if (XINT (chars) != 94 && XINT (chars) != 96)
1469 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1470 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1471 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1472}
1473
1474
1475DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1476 4, 4, 0,
cefd8c4f
KH
1477 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1478
1479On decoding by an ISO-2022 base coding system, when a charset
1480specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1481if CHARSET is designated instead. */)
3263d5a2
KH
1482 (dimension, chars, final_char, charset)
1483 Lisp_Object dimension, chars, final_char, charset;
1484{
1485 int id;
82215ce9 1486 int chars_flag;
4ed46869 1487
3263d5a2
KH
1488 CHECK_CHARSET_GET_ID (charset, id);
1489 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1490 chars_flag = XINT (chars) == 96;
1491 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1492 return Qnil;
1493}
1494
3263d5a2 1495
2e344af3
KH
1496/* Return information about charsets in the text at PTR of NBYTES
1497 bytes, which are NCHARS characters. The value is:
f6302ac9 1498
cfe34140 1499 0: Each character is represented by one byte. This is always
3263d5a2
KH
1500 true for a unibyte string. For a multibyte string, true if
1501 it contains only ASCII characters.
1502
28c026cd
DL
1503 1: No charsets other than ascii, control-1, and latin-1 are
1504 found.
1d67c29b 1505
3263d5a2
KH
1506 2: Otherwise.
1507*/
4ed46869
KH
1508
1509int
3263d5a2
KH
1510string_xstring_p (string)
1511 Lisp_Object string;
4ed46869 1512{
8f924df7
KH
1513 const unsigned char *p = SDATA (string);
1514 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1515
8f924df7 1516 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1517 return 0;
1518
3263d5a2 1519 while (p < endp)
0282eb69 1520 {
3263d5a2 1521 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1522
3cc67a4d 1523 if (c >= 0x100)
3263d5a2 1524 return 2;
0282eb69 1525 }
3263d5a2
KH
1526 return 1;
1527}
05505664 1528
05505664 1529
3263d5a2 1530/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1531
3cc67a4d
KH
1532 CHARSETS is a vector. If Nth element is non-nil, it means the
1533 charset whose id is N is already found.
2e344af3 1534
3263d5a2 1535 It may lookup a translation table TABLE if supplied. */
2e344af3 1536
3263d5a2 1537static void
3cc67a4d 1538find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
dbbb237d 1539 const unsigned char *ptr;
42ca828e 1540 EMACS_INT nchars, nbytes;
3263d5a2 1541 Lisp_Object charsets, table;
3cc67a4d 1542 int multibyte;
3263d5a2 1543{
dbbb237d 1544 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1545
1546 if (nchars == nbytes)
3263d5a2 1547 {
3cc67a4d
KH
1548 if (multibyte)
1549 ASET (charsets, charset_ascii, Qt);
1550 else
1551 while (ptr < pend)
1552 {
1553 int c = *ptr++;
1554
1555 if (!NILP (table))
1556 c = translate_char (table, c);
1557 if (ASCII_BYTE_P (c))
1558 ASET (charsets, charset_ascii, Qt);
1559 else
1560 ASET (charsets, charset_eight_bit, Qt);
1561 }
1562 }
1563 else
1564 {
1565 while (ptr < pend)
3263d5a2 1566 {
3cc67a4d
KH
1567 int c = STRING_CHAR_ADVANCE (ptr);
1568 struct charset *charset;
3263d5a2 1569
3cc67a4d
KH
1570 if (!NILP (table))
1571 c = translate_char (table, c);
1572 charset = CHAR_CHARSET (c);
1573 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1574 }
4ed46869 1575 }
4ed46869
KH
1576}
1577
1578DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1579 2, 3, 0,
fdb82f93
PJ
1580 doc: /* Return a list of charsets in the region between BEG and END.
1581BEG and END are buffer positions.
1582Optional arg TABLE if non-nil is a translation table to look up.
1583
fdb82f93
PJ
1584If the current buffer is unibyte, the returned list may contain
1585only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1586 (beg, end, table)
23d2a7f1 1587 Lisp_Object beg, end, table;
4ed46869 1588{
3263d5a2 1589 Lisp_Object charsets;
42ca828e
DL
1590 EMACS_INT from, from_byte, to, stop, stop_byte;
1591 int i;
4ed46869 1592 Lisp_Object val;
3cc67a4d 1593 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1594
1595 validate_region (&beg, &end);
1596 from = XFASTINT (beg);
1597 stop = to = XFASTINT (end);
6ae1f27e 1598
4ed46869 1599 if (from < GPT && GPT < to)
6ae1f27e
RS
1600 {
1601 stop = GPT;
1602 stop_byte = GPT_BYTE;
1603 }
1604 else
1605 stop_byte = CHAR_TO_BYTE (stop);
1606
1607 from_byte = CHAR_TO_BYTE (from);
1608
3263d5a2 1609 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1610 while (1)
1611 {
3263d5a2 1612 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1613 stop_byte - from_byte, charsets, table,
1614 multibyte);
4ed46869 1615 if (stop < to)
6ae1f27e
RS
1616 {
1617 from = stop, from_byte = stop_byte;
1618 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1619 }
4ed46869
KH
1620 else
1621 break;
1622 }
6ae1f27e 1623
4ed46869 1624 val = Qnil;
3263d5a2 1625 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1626 if (!NILP (AREF (charsets, i)))
3263d5a2 1627 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1628 return val;
1629}
1630
1631DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1632 1, 2, 0,
fdb82f93
PJ
1633 doc: /* Return a list of charsets in STR.
1634Optional arg TABLE if non-nil is a translation table to look up.
1635
fdb82f93 1636If STR is unibyte, the returned list may contain
3263d5a2 1637only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1638 (str, table)
23d2a7f1 1639 Lisp_Object str, table;
4ed46869 1640{
3263d5a2 1641 Lisp_Object charsets;
4ed46869
KH
1642 int i;
1643 Lisp_Object val;
1644
b7826503 1645 CHECK_STRING (str);
87b089ad 1646
3263d5a2 1647 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1648 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1649 charsets, table,
1650 STRING_MULTIBYTE (str));
4ed46869 1651 val = Qnil;
3263d5a2 1652 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1653 if (!NILP (AREF (charsets, i)))
3263d5a2 1654 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1655 return val;
1656}
2e344af3 1657
4ed46869 1658\f
3263d5a2 1659
ecca2aad
KH
1660/* Return a unified character code for C (>= 0x110000). VAL is a
1661 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1662 charset symbol. */
1663int
1664maybe_unify_char (c, val)
1665 int c;
1666 Lisp_Object val;
1667{
1668 struct charset *charset;
1669
1670 if (INTEGERP (val))
1671 return XINT (val);
1672 if (NILP (val))
1673 return c;
1674
1675 CHECK_CHARSET_GET_CHARSET (val, charset);
1676 load_charset (charset, 1);
1677 if (! inhibit_load_charset_map)
1678 {
1679 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1680 if (! NILP (val))
1681 c = XINT (val);
1682 }
1683 else
1684 {
1685 int code_index = c - CHARSET_CODE_OFFSET (charset);
1686 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1687
1688 if (unified > 0)
1689 c = unified;
1690 }
1691 return c;
1692}
1693
1694
3263d5a2
KH
1695/* Return a character correponding to the code-point CODE of
1696 CHARSET. */
1697
1698int
1699decode_char (charset, code)
1700 struct charset *charset;
1701 unsigned code;
4ed46869 1702{
3263d5a2
KH
1703 int c, char_index;
1704 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1705
3263d5a2
KH
1706 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1707 return -1;
4ed46869 1708
374c5cfd 1709 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1710 {
374c5cfd
KH
1711 Lisp_Object subset_info;
1712
1713 subset_info = CHARSET_SUBSET (charset);
1714 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1715 code -= XINT (AREF (subset_info, 3));
1716 if (code < XFASTINT (AREF (subset_info, 1))
1717 || code > XFASTINT (AREF (subset_info, 2)))
1718 c = -1;
1719 else
1720 c = DECODE_CHAR (charset, code);
2e344af3 1721 }
374c5cfd 1722 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1723 {
3263d5a2 1724 Lisp_Object parents;
4ed46869 1725
374c5cfd 1726 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1727 c = -1;
1728 for (; CONSP (parents); parents = XCDR (parents))
1729 {
1730 int id = XINT (XCAR (XCAR (parents)));
1731 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1732 unsigned this_code = code - code_offset;
4ed46869 1733
3263d5a2
KH
1734 charset = CHARSET_FROM_ID (id);
1735 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1736 break;
1737 }
1738 }
1739 else
ac4137cc 1740 {
3263d5a2 1741 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1742 if (char_index < 0)
1743 return -1;
4ed46869 1744
3263d5a2 1745 if (method == CHARSET_METHOD_MAP)
ac4137cc 1746 {
3263d5a2 1747 Lisp_Object decoder;
4ed46869 1748
3263d5a2
KH
1749 decoder = CHARSET_DECODER (charset);
1750 if (! VECTORP (decoder))
ecca2aad
KH
1751 {
1752 load_charset (charset, 1);
1753 decoder = CHARSET_DECODER (charset);
1754 }
1755 if (VECTORP (decoder))
1756 c = XINT (AREF (decoder, char_index));
1757 else
1758 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1759 }
ecca2aad 1760 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1761 {
3263d5a2 1762 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1763 if (CHARSET_UNIFIED_P (charset)
1764 && c > MAX_UNICODE_CHAR)
1765 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1766 }
1767 }
4ed46869 1768
3263d5a2 1769 return c;
90d7b74e
KH
1770}
1771
374c5cfd
KH
1772/* Variable used temporarily by the macro ENCODE_CHAR. */
1773Lisp_Object charset_work;
4ed46869 1774
3263d5a2 1775/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1776 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1777 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1778
3263d5a2
KH
1779unsigned
1780encode_char (charset, c)
1781 struct charset *charset;
9b6a601f 1782 int c;
9d3d8cba 1783{
3263d5a2
KH
1784 unsigned code;
1785 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1786
3263d5a2 1787 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1788 {
374c5cfd 1789 Lisp_Object deunifier, deunified;
ecca2aad 1790 int code_index = -1;
4ed46869 1791
3263d5a2
KH
1792 deunifier = CHARSET_DEUNIFIER (charset);
1793 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1794 {
ecca2aad 1795 load_charset (charset, 2);
3263d5a2 1796 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1797 }
ecca2aad
KH
1798 if (CHAR_TABLE_P (deunifier))
1799 {
1800 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1801
1802 if (INTEGERP (deunified))
1803 code_index = XINT (deunified);
1804 }
1805 else
1806 {
1807 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1808 }
1809 if (code_index >= 0)
1810 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1811 }
9d3d8cba 1812
374c5cfd
KH
1813 if (method == CHARSET_METHOD_SUBSET)
1814 {
1815 Lisp_Object subset_info;
1816 struct charset *this_charset;
1817
1818 subset_info = CHARSET_SUBSET (charset);
1819 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1820 code = ENCODE_CHAR (this_charset, c);
1821 if (code == CHARSET_INVALID_CODE (this_charset)
1822 || code < XFASTINT (AREF (subset_info, 1))
1823 || code > XFASTINT (AREF (subset_info, 2)))
1824 return CHARSET_INVALID_CODE (charset);
1825 code += XINT (AREF (subset_info, 3));
1826 return code;
1827 }
9d3d8cba 1828
374c5cfd 1829 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1830 {
3263d5a2 1831 Lisp_Object parents;
d2665018 1832
374c5cfd 1833 parents = CHARSET_SUPERSET (charset);
3263d5a2 1834 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1835 {
3263d5a2
KH
1836 int id = XINT (XCAR (XCAR (parents)));
1837 int code_offset = XINT (XCDR (XCAR (parents)));
1838 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1839
3263d5a2 1840 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1841 if (code != CHARSET_INVALID_CODE (this_charset))
1842 return code + code_offset;
beeedaad 1843 }
3263d5a2
KH
1844 return CHARSET_INVALID_CODE (charset);
1845 }
1bcc1567 1846
15c85a88
KH
1847 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1848 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1849 return CHARSET_INVALID_CODE (charset);
1bcc1567 1850
3263d5a2 1851 if (method == CHARSET_METHOD_MAP)
3f62427c 1852 {
3263d5a2 1853 Lisp_Object encoder;
beeedaad 1854 Lisp_Object val;
9b6a601f 1855
3263d5a2
KH
1856 encoder = CHARSET_ENCODER (charset);
1857 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
1858 load_charset (charset);
1859 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1860 {
1861 val = CHAR_TABLE_REF (encoder, c);
1862 if (NILP (val))
1863 return CHARSET_INVALID_CODE (charset);
1864 code = XINT (val);
1865 if (! CHARSET_COMPACT_CODES_P (charset))
1866 code = INDEX_TO_CODE_POINT (charset, code);
1867 }
1868 else
1869 {
1870 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1871 code = INDEX_TO_CODE_POINT (charset, code);
1872 }
3263d5a2 1873 }
820ee249 1874 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1875 {
ecca2aad
KH
1876 int code_index = c - CHARSET_CODE_OFFSET (charset);
1877
1878 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1879 }
8ac5a9cc 1880
3263d5a2 1881 return code;
4ed46869
KH
1882}
1883
4ed46869 1884
3263d5a2
KH
1885DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1886 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1887Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1888
3263d5a2 1889CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1890
3263d5a2 1891Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1892and CODE-POINT to a character. Currently not supported and just ignored. */)
3263d5a2
KH
1893 (charset, code_point, restriction)
1894 Lisp_Object charset, code_point, restriction;
4ed46869 1895{
3263d5a2
KH
1896 int c, id;
1897 unsigned code;
1898 struct charset *charsetp;
859f2b3c 1899
3263d5a2
KH
1900 CHECK_CHARSET_GET_ID (charset, id);
1901 if (CONSP (code_point))
1902 {
8f924df7
KH
1903 CHECK_NATNUM_CAR (code_point);
1904 CHECK_NATNUM_CDR (code_point);
69f8de5b 1905 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1906 }
4ed46869
KH
1907 else
1908 {
3263d5a2
KH
1909 CHECK_NATNUM (code_point);
1910 code = XINT (code_point);
4ed46869 1911 }
3263d5a2
KH
1912 charsetp = CHARSET_FROM_ID (id);
1913 c = DECODE_CHAR (charsetp, code);
1914 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1915}
1916
859f2b3c 1917
3263d5a2
KH
1918DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1919 doc: /* Encode the character CH into a code-point of CHARSET.
1920Return nil if CHARSET doesn't include CH.
17e7ef1b 1921
d0cf2d48 1922Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2
KH
1923code-point in CCS. Currently not supported and just ignored. */)
1924 (ch, charset, restriction)
1925 Lisp_Object ch, charset, restriction;
4ed46869 1926{
16fed1fc 1927 int id;
3263d5a2
KH
1928 unsigned code;
1929 struct charset *charsetp;
046b1f03 1930
3263d5a2
KH
1931 CHECK_CHARSET_GET_ID (charset, id);
1932 CHECK_NATNUM (ch);
3263d5a2 1933 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1934 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1935 if (code == CHARSET_INVALID_CODE (charsetp))
1936 return Qnil;
1937 if (code > 0x7FFFFFF)
1938 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1939 return make_number (code);
beeedaad
KH
1940}
1941
beeedaad 1942
b121a744
KH
1943DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1944 doc:
1945 /* Return a character of CHARSET whose position codes are CODEn.
1946
1947CODE1 through CODE4 are optional, but if you don't supply sufficient
1948position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1949is specified. */)
b121a744
KH
1950 (charset, code1, code2, code3, code4)
1951 Lisp_Object charset, code1, code2, code3, code4;
beeedaad 1952{
3263d5a2
KH
1953 int id, dimension;
1954 struct charset *charsetp;
b121a744
KH
1955 unsigned code;
1956 int c;
87b089ad 1957
3263d5a2
KH
1958 CHECK_CHARSET_GET_ID (charset, id);
1959 charsetp = CHARSET_FROM_ID (id);
4ed46869 1960
b121a744
KH
1961 dimension = CHARSET_DIMENSION (charsetp);
1962 if (NILP (code1))
d47073ca
KH
1963 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1964 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1965 else
859f2b3c 1966 {
b121a744
KH
1967 CHECK_NATNUM (code1);
1968 if (XFASTINT (code1) >= 0x100)
1969 args_out_of_range (make_number (0xFF), code1);
1970 code = XFASTINT (code1);
859f2b3c 1971
b0a1e45e 1972 if (dimension > 1)
beeedaad 1973 {
b121a744 1974 code <<= 8;
b0a1e45e
KH
1975 if (NILP (code2))
1976 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1977 else
b121a744 1978 {
b0a1e45e
KH
1979 CHECK_NATNUM (code2);
1980 if (XFASTINT (code2) >= 0x100)
1981 args_out_of_range (make_number (0xFF), code2);
1982 code |= XFASTINT (code2);
b121a744 1983 }
99529c2c 1984
b0a1e45e 1985 if (dimension > 2)
b121a744
KH
1986 {
1987 code <<= 8;
b0a1e45e
KH
1988 if (NILP (code3))
1989 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1990 else
1991 {
b0a1e45e
KH
1992 CHECK_NATNUM (code3);
1993 if (XFASTINT (code3) >= 0x100)
1994 args_out_of_range (make_number (0xFF), code3);
1995 code |= XFASTINT (code3);
1996 }
1997
1998 if (dimension > 3)
1999 {
2000 code <<= 8;
2001 if (NILP (code4))
2002 code |= charsetp->code_space[0];
2003 else
2004 {
2005 CHECK_NATNUM (code4);
2006 if (XFASTINT (code4) >= 0x100)
2007 args_out_of_range (make_number (0xFF), code4);
2008 code |= XFASTINT (code4);
2009 }
b121a744
KH
2010 }
2011 }
beeedaad 2012 }
859f2b3c 2013 }
beeedaad 2014
b121a744
KH
2015 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2016 code &= 0x7F7F7F7F;
2017 c = DECODE_CHAR (charsetp, code);
2018 if (c < 0)
2019 error ("Invalid code(s)");
3263d5a2 2020 return make_number (c);
4ed46869
KH
2021}
2022
beeedaad 2023
3263d5a2
KH
2024/* Return the first charset in CHARSET_LIST that contains C.
2025 CHARSET_LIST is a list of charset IDs. If it is nil, use
2026 Vcharset_ordered_list. */
beeedaad 2027
3263d5a2
KH
2028struct charset *
2029char_charset (c, charset_list, code_return)
2030 int c;
2031 Lisp_Object charset_list;
2032 unsigned *code_return;
2e344af3 2033{
8a1816bb
KH
2034 int maybe_null = 0;
2035
3263d5a2
KH
2036 if (NILP (charset_list))
2037 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2038 else
2039 maybe_null = 1;
beeedaad 2040
6c652beb 2041 while (CONSP (charset_list))
2e344af3 2042 {
3263d5a2
KH
2043 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2044 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2045
3263d5a2 2046 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2047 {
3263d5a2
KH
2048 if (code_return)
2049 *code_return = code;
2050 return charset;
3f62427c 2051 }
3263d5a2 2052 charset_list = XCDR (charset_list);
6c652beb
KH
2053 if (c <= MAX_UNICODE_CHAR
2054 && EQ (charset_list, Vcharset_non_preferred_head))
2055 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2056 }
8a1816bb
KH
2057 return (maybe_null ? NULL
2058 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2059 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2060}
2061
2e344af3 2062
3263d5a2 2063DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2064 doc:
d0cf2d48 2065 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2066The charset is decided by the current priority order of charsets.
2067A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2068CH in the charset. */)
3263d5a2
KH
2069 (ch)
2070 Lisp_Object ch;
4ed46869 2071{
3263d5a2
KH
2072 struct charset *charset;
2073 int c, dimension;
2074 unsigned code;
4ed46869
KH
2075 Lisp_Object val;
2076
3263d5a2
KH
2077 CHECK_CHARACTER (ch);
2078 c = XFASTINT (ch);
2079 charset = CHAR_CHARSET (c);
2080 if (! charset)
3cc67a4d 2081 abort ();
3263d5a2
KH
2082 code = ENCODE_CHAR (charset, c);
2083 if (code == CHARSET_INVALID_CODE (charset))
2084 abort ();
2085 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2086 for (val = Qnil; dimension > 0; dimension--)
2087 {
2088 val = Fcons (make_number (code & 0xFF), val);
2089 code >>= 8;
2090 }
3263d5a2 2091 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2092}
2093
740f080d 2094
3263d5a2 2095DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
327719ee 2096 doc: /* Return the charset of highest priority that contains CH. */)
fdb82f93 2097 (ch)
4ed46869
KH
2098 Lisp_Object ch;
2099{
3263d5a2 2100 struct charset *charset;
4ed46869 2101
3263d5a2
KH
2102 CHECK_CHARACTER (ch);
2103 charset = CHAR_CHARSET (XINT (ch));
2104 return (CHARSET_NAME (charset));
4ed46869
KH
2105}
2106
17e7ef1b 2107
3263d5a2
KH
2108DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2109 doc: /*
2110Return charset of a character in the current buffer at position POS.
2111If POS is nil, it defauls to the current point.
2112If POS is out of range, the value is nil. */)
2113 (pos)
2114 Lisp_Object pos;
2e344af3 2115{
3263d5a2
KH
2116 Lisp_Object ch;
2117 struct charset *charset;
046b1f03 2118
3263d5a2
KH
2119 ch = Fchar_after (pos);
2120 if (! INTEGERP (ch))
2121 return ch;
2122 charset = CHAR_CHARSET (XINT (ch));
2123 return (CHARSET_NAME (charset));
6ae1f27e 2124}
9036eb45 2125
87b089ad 2126
3263d5a2
KH
2127DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2128 doc: /*
2129Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2130
2131ISO 2022's designation sequence (escape sequence) distinguishes charsets
2132by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2133whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2134See the documentation of the function `charset-info' for the meanings of
2135DIMENSION, CHARS, and FINAL-CHAR. */)
2136 (dimension, chars, final_char)
2137 Lisp_Object dimension, chars, final_char;
6ae1f27e 2138{
3263d5a2 2139 int id;
82215ce9 2140 int chars_flag;
a8a35e61 2141
3263d5a2 2142 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2143 chars_flag = XFASTINT (chars) == 96;
2144 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2145 XFASTINT (final_char));
2146 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2147}
2148
87b089ad 2149
3263d5a2
KH
2150DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2151 0, 0, 0,
2152 doc: /*
ecca2aad
KH
2153Internal use only.
2154Clear temporary charset mapping tables.
2155It should be called only from temacs invoked for dumping. */)
3263d5a2 2156 ()
87b089ad 2157{
53316e55 2158 int i;
3263d5a2
KH
2159 struct charset *charset;
2160 Lisp_Object attrs;
87b089ad 2161
ecca2aad 2162 if (temp_charset_work)
87b089ad 2163 {
ecca2aad
KH
2164 free (temp_charset_work);
2165 temp_charset_work = NULL;
2e344af3 2166 }
2e344af3 2167
ecca2aad
KH
2168 if (CHAR_TABLE_P (Vchar_unify_table))
2169 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2170
3263d5a2 2171 return Qnil;
740f080d
KH
2172}
2173
8ddf5e57
DL
2174DEFUN ("charset-priority-list", Fcharset_priority_list,
2175 Scharset_priority_list, 0, 1, 0,
2176 doc: /* Return the list of charsets ordered by priority.
2177HIGHESTP non-nil means just return the highest priority one. */)
2178 (highestp)
2179 Lisp_Object highestp;
2e344af3 2180{
8ddf5e57 2181 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2182
8ddf5e57 2183 if (!NILP (highestp))
16fed1fc 2184 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2185
8ddf5e57 2186 while (!NILP (list))
2e344af3 2187 {
16fed1fc 2188 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2189 list = XCDR (list);
2e344af3 2190 }
8ddf5e57 2191 return Fnreverse (val);
2e344af3
KH
2192}
2193
8ddf5e57
DL
2194DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2195 1, MANY, 0,
2196 doc: /* Assign higher priority to the charsets given as arguments.
2197usage: (set-charset-priority &rest charsets) */)
2198 (nargs, args)
2199 int nargs;
4ed46869
KH
2200 Lisp_Object *args;
2201{
af7c60ca 2202 Lisp_Object new_head, old_list, arglist[2];
321c819c 2203 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2204 int i, id;
4ed46869 2205
8ddf5e57 2206 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2207 new_head = Qnil;
8ddf5e57 2208 for (i = 0; i < nargs; i++)
4ed46869 2209 {
8ddf5e57 2210 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2211 if (! NILP (Fmemq (make_number (id), old_list)))
2212 {
2213 old_list = Fdelq (make_number (id), old_list);
2214 new_head = Fcons (make_number (id), new_head);
2215 }
5729c92f 2216 }
8ddf5e57 2217 arglist[0] = Fnreverse (new_head);
6a9c90ec 2218 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2219 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2220 charset_ordered_list_tick++;
5729c92f 2221
321c819c 2222 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2223 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2224 {
e77415b0 2225 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2226 list_2022 = Fcons (XCAR (old_list), list_2022);
2227 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2228 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
4ed46869 2229 }
321c819c
KH
2230 Viso_2022_charset_list = Fnreverse (list_2022);
2231 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
4ed46869 2232
8ddf5e57 2233 return Qnil;
4ed46869
KH
2234}
2235
d5b33309
KH
2236DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2237 0, 1, 0,
2238 doc: /* Internal use only.
2239Return charset identification number of CHARSET. */)
2240 (charset)
2241 Lisp_Object charset;
4ed46869 2242{
d5b33309 2243 int id;
4ed46869 2244
d5b33309
KH
2245 CHECK_CHARSET_GET_ID (charset, id);
2246 return make_number (id);
4ed46869
KH
2247}
2248
4ed46869 2249\f
3263d5a2
KH
2250void
2251init_charset ()
4ed46869 2252{
c8f94403
GM
2253 Lisp_Object tempdir;
2254 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2255 if (access (SDATA (tempdir), 0) < 0)
2256 {
2257 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2258Emacs will not function correctly without the character map files.\n\
2259Please check your installation!\n",
2260 tempdir);
2261 /* TODO should this be a fatal error? (Bug#909) */
2262 }
2263
2264 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2265}
2266
4ed46869 2267
dfcf069d 2268void
4ed46869
KH
2269init_charset_once ()
2270{
2271 int i, j, k;
2272
3263d5a2
KH
2273 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2274 for (j = 0; j < ISO_MAX_CHARS; j++)
2275 for (k = 0; k < ISO_MAX_FINAL; k++)
2276 iso_charset_table[i][j][k] = -1;
4ed46869 2277
60383934 2278 for (i = 0; i < 256; i++)
3263d5a2 2279 emacs_mule_charset[i] = NULL;
4ed46869 2280
7c7dceee
KH
2281 charset_jisx0201_roman = -1;
2282 charset_jisx0208_1978 = -1;
2283 charset_jisx0208 = -1;
4ed46869
KH
2284
2285 for (i = 0; i < 128; i++)
14e3d523 2286 unibyte_to_multibyte_table[i] = i;
4ed46869 2287 for (; i < 256; i++)
170e4589 2288 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
4ed46869
KH
2289}
2290
2291#ifdef emacs
2292
dfcf069d 2293void
4ed46869
KH
2294syms_of_charset ()
2295{
3263d5a2
KH
2296 DEFSYM (Qcharsetp, "charsetp");
2297
2298 DEFSYM (Qascii, "ascii");
2299 DEFSYM (Qunicode, "unicode");
6c652beb 2300 DEFSYM (Qemacs, "emacs");
2fe1edd1 2301 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2302 DEFSYM (Qiso_8859_1, "iso-8859-1");
2303
2304 DEFSYM (Qgl, "gl");
2305 DEFSYM (Qgr, "gr");
2306
3263d5a2
KH
2307 staticpro (&Vcharset_ordered_list);
2308 Vcharset_ordered_list = Qnil;
2309
2310 staticpro (&Viso_2022_charset_list);
2311 Viso_2022_charset_list = Qnil;
2312
2313 staticpro (&Vemacs_mule_charset_list);
2314 Vemacs_mule_charset_list = Qnil;
2315
3943ed76
KH
2316 /* Don't staticpro them here. It's done in syms_of_fns. */
2317 QCtest = intern (":test");
2318 Qeq = intern ("eq");
2319
3263d5a2 2320 staticpro (&Vcharset_hash_table);
8f924df7
KH
2321 {
2322 Lisp_Object args[2];
2323 args[0] = QCtest;
2324 args[1] = Qeq;
2325 Vcharset_hash_table = Fmake_hash_table (2, args);
2326 }
3263d5a2
KH
2327
2328 charset_table_size = 128;
2329 charset_table = ((struct charset *)
2330 xmalloc (sizeof (struct charset) * charset_table_size));
2331 charset_table_used = 0;
2332
3263d5a2
KH
2333 defsubr (&Scharsetp);
2334 defsubr (&Smap_charset_chars);
2335 defsubr (&Sdefine_charset_internal);
2336 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2337 defsubr (&Scharset_plist);
2338 defsubr (&Sset_charset_plist);
2339 defsubr (&Sunify_charset);
3fac5a51 2340 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2341 defsubr (&Sdeclare_equiv_charset);
2342 defsubr (&Sfind_charset_region);
2343 defsubr (&Sfind_charset_string);
3263d5a2
KH
2344 defsubr (&Sdecode_char);
2345 defsubr (&Sencode_char);
4ed46869 2346 defsubr (&Ssplit_char);
3263d5a2 2347 defsubr (&Smake_char);
4ed46869 2348 defsubr (&Schar_charset);
90d7b74e 2349 defsubr (&Scharset_after);
4ed46869 2350 defsubr (&Siso_charset);
3263d5a2 2351 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2352 defsubr (&Scharset_priority_list);
2353 defsubr (&Sset_charset_priority);
d5b33309 2354 defsubr (&Scharset_id_internal);
3263d5a2 2355
4beef065 2356 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2357 doc: /* *List of directories to search for charset map files. */);
4beef065 2358 Vcharset_map_path = Qnil;
4ed46869 2359
ecca2aad
KH
2360 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2361 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2362 inhibit_load_charset_map = 0;
2363
4ed46869 2364 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2365 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2366 Vcharset_list = Qnil;
2367
6a9c90ec
KH
2368 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2369 doc: /* ISO639 language mnemonic symbol for the current language environment.
2370If the current language environment is for multiple languages (e.g. "Latin-1"),
2371the value may be a list of mnemonics. */);
2372 Vcurrent_iso639_language = Qnil;
2373
2fe1edd1
KH
2374 charset_ascii
2375 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2376 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2377 charset_iso_8859_1
2378 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2379 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2380 charset_unicode
73fbf2d9 2381 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2382 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2383 charset_emacs
2384 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2385 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2386 charset_eight_bit
2387 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2388 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2389 MAX_5_BYTE_CHAR + 1);
4ed46869
KH
2390}
2391
2392#endif /* emacs */
cefd8c4f
KH
2393
2394/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2395 (do not change this comment) */