Convert (most) functions in src to standard C.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2 1/* Basic character set support.
c8f94403 2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
114f9c96 3 2008, 2009, 2010 Free Software Foundation, Inc.
7976eda0 4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
114f9c96 5 2005, 2006, 2007, 2008, 2009, 2010
ce03bf76
KH
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
4ed46869 8
327719ee 9 Copyright (C) 2003, 2004
3263d5a2
KH
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
4ed46869 12
369314dc
KH
13This file is part of GNU Emacs.
14
9ec0b715 15GNU Emacs is free software: you can redistribute it and/or modify
369314dc 16it under the terms of the GNU General Public License as published by
9ec0b715
GM
17the Free Software Foundation, either version 3 of the License, or
18(at your option) any later version.
4ed46869 19
369314dc
KH
20GNU Emacs is distributed in the hope that it will be useful,
21but WITHOUT ANY WARRANTY; without even the implied warranty of
22MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23GNU General Public License for more details.
4ed46869 24
369314dc 25You should have received a copy of the GNU General Public License
9ec0b715 26along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
4ed46869 27
68c45bf0 28#include <config.h>
68c45bf0 29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869 33#include <sys/types.h>
d7306fe6 34#include <setjmp.h>
4ed46869 35#include "lisp.h"
3263d5a2 36#include "character.h"
4ed46869
KH
37#include "charset.h"
38#include "coding.h"
fc6b09bf 39#include "disptab.h"
3263d5a2 40#include "buffer.h"
4ed46869 41
04c2f2c5 42/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 43
3263d5a2 44 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 45 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
4ed46869 49
3263d5a2
KH
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
04c2f2c5 53 charset_table as a struct charset.
4ed46869 54
3263d5a2 55*/
4ed46869 56
3263d5a2
KH
57/* List of all charsets. This variable is used only from Emacs
58 Lisp. */
4ed46869 59Lisp_Object Vcharset_list;
4ed46869 60
3263d5a2
KH
61/* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63Lisp_Object Vcharset_hash_table;
4ed46869 64
3263d5a2
KH
65/* Table of struct charset. */
66struct charset *charset_table;
4ed46869 67
3263d5a2 68static int charset_table_size;
5af5dd92 69static int charset_table_used;
4ed46869 70
3263d5a2 71Lisp_Object Qcharsetp;
4ed46869 72
3263d5a2
KH
73/* Special charset symbols. */
74Lisp_Object Qascii;
2fe1edd1 75Lisp_Object Qeight_bit;
3263d5a2
KH
76Lisp_Object Qiso_8859_1;
77Lisp_Object Qunicode;
6c652beb 78Lisp_Object Qemacs;
4ed46869 79
3263d5a2
KH
80/* The corresponding charsets. */
81int charset_ascii;
2fe1edd1 82int charset_eight_bit;
3263d5a2
KH
83int charset_iso_8859_1;
84int charset_unicode;
6c652beb 85int charset_emacs;
b0e3cf2b 86
7c7dceee
KH
87/* The other special charsets. */
88int charset_jisx0201_roman;
89int charset_jisx0208_1978;
90int charset_jisx0208;
d32320c4 91int charset_ksc5601;
c1a08b4c 92
3263d5a2
KH
93/* Value of charset attribute `charset-iso-plane'. */
94Lisp_Object Qgl, Qgr;
c1a08b4c 95
d1a04588
KH
96/* Charset of unibyte characters. */
97int charset_unibyte;
4ed46869 98
3263d5a2
KH
99/* List of charsets ordered by the priority. */
100Lisp_Object Vcharset_ordered_list;
4ed46869 101
6a9c90ec
KH
102/* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104Lisp_Object Vcharset_non_preferred_head;
105
dbbb237d 106/* Incremented everytime we change Vcharset_ordered_list. This is
64165ae2 107 unsigned short so that it fits in Lisp_Int and never matches
dbbb237d
KH
108 -1. */
109unsigned short charset_ordered_list_tick;
4ed46869 110
3263d5a2
KH
111/* List of iso-2022 charsets. */
112Lisp_Object Viso_2022_charset_list;
35e623fb 113
3263d5a2
KH
114/* List of emacs-mule charsets. */
115Lisp_Object Vemacs_mule_charset_list;
116
117struct charset *emacs_mule_charset[256];
4ed46869
KH
118
119/* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
121int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
4beef065 123Lisp_Object Vcharset_map_path;
3263d5a2 124
ecca2aad
KH
125/* If nonzero, don't load charset maps. */
126int inhibit_load_charset_map;
3263d5a2 127
6a9c90ec
KH
128Lisp_Object Vcurrent_iso639_language;
129
64165ae2
DL
130/* Defined in chartab.c */
131extern void
f57e2426
J
132map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to);
64165ae2 136
69f8de5b
KH
137#define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
820ee249
KH
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
3263d5a2
KH
152 : -1)
153
154
155/* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
820ee249
KH
158#define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
4cf9710d 171
ecca2aad
KH
172/* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
8a73a704 174
ecca2aad
KH
175static struct
176{
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205} *temp_charset_work;
206
207#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217#define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
046b1f03 229\f
93bcb785 230
e9ce014c
KH
231/* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
3263d5a2 233int charset_map_loaded;
4ed46869 234
e9ce014c 235struct charset_map_entries
4ed46869 236{
e9ce014c
KH
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242};
243
ecca2aad
KH
244/* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
4cf9710d 253
ecca2aad
KH
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
8a73a704 259
ecca2aad 260 If CONTROL_FLAG is 2, setup the following tables.
93bcb785 261
ecca2aad
KH
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267*/
4ed46869 268
3263d5a2 269static void
971de7fb 270load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
4ed46869 271{
3263d5a2 272 Lisp_Object vec, table;
3263d5a2
KH
273 unsigned max_code = CHARSET_MAX_CODE (charset);
274 int ascii_compatible_p = charset->ascii_compatible_p;
275 int min_char, max_char, nonascii_min_char;
3263d5a2 276 int i;
3263d5a2 277 unsigned char *fast_map = charset->fast_map;
99529c2c 278
e9ce014c
KH
279 if (n_entries <= 0)
280 return;
281
ecca2aad 282 if (control_flag)
8ac5a9cc 283 {
ecca2aad
KH
284 if (! inhibit_load_charset_map)
285 {
286 if (control_flag == 1)
287 {
288 if (charset->method == CHARSET_METHOD_MAP)
289 {
290 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
6662e69b 291
ecca2aad
KH
292 vec = CHARSET_DECODER (charset)
293 = Fmake_vector (make_number (n), make_number (-1));
294 }
295 else
296 {
297 char_table_set_range (Vchar_unify_table,
298 charset->min_char, charset->max_char,
299 Qnil);
300 }
301 }
302 else
303 {
304 table = Fmake_char_table (Qnil, Qnil);
305 if (charset->method == CHARSET_METHOD_MAP)
306 CHARSET_ENCODER (charset) = table;
307 else
308 CHARSET_DEUNIFIER (charset) = table;
309 }
310 }
311 else
312 {
313 if (! temp_charset_work)
314 temp_charset_work = malloc (sizeof (*temp_charset_work));
315 if (control_flag == 1)
316 {
317 memset (temp_charset_work->table.decoder, -1,
318 sizeof (int) * 0x10000);
ecca2aad
KH
319 }
320 else
321 {
322 memset (temp_charset_work->table.encoder, 0,
323 sizeof (unsigned short) * 0x20000);
324 temp_charset_work->zero_index_char = -1;
325 }
326 temp_charset_work->current = charset;
327 temp_charset_work->for_encoder = (control_flag == 2);
328 control_flag += 2;
329 }
3263d5a2 330 charset_map_loaded = 1;
2e344af3 331 }
6662e69b 332
e9ce014c 333 min_char = max_char = entries->entry[0].c;
3263d5a2 334 nonascii_min_char = MAX_CHAR;
e9ce014c 335 for (i = 0; i < n_entries; i++)
2e344af3 336 {
e9ce014c 337 unsigned from, to;
3b4f4446
KH
338 int from_index, to_index;
339 int from_c, to_c;
e9ce014c 340 int idx = i % 0x10000;
3263d5a2 341
e9ce014c
KH
342 if (i > 0 && idx == 0)
343 entries = entries->next;
344 from = entries->entry[idx].from;
345 to = entries->entry[idx].to;
3b4f4446
KH
346 from_c = entries->entry[idx].c;
347 from_index = CODE_POINT_TO_INDEX (charset, from);
348 if (from == to)
6662e69b 349 {
3b4f4446
KH
350 to_index = from_index;
351 to_c = from_c;
6662e69b 352 }
3b4f4446 353 else
6662e69b 354 {
3b4f4446
KH
355 to_index = CODE_POINT_TO_INDEX (charset, to);
356 to_c = from_c + (to_index - from_index);
6662e69b 357 }
3b4f4446
KH
358 if (from_index < 0 || to_index < 0)
359 continue;
3263d5a2 360
ecca2aad
KH
361 if (to_c > max_char)
362 max_char = to_c;
363 else if (from_c < min_char)
364 min_char = from_c;
6662e69b 365
ecca2aad
KH
366 if (control_flag == 1)
367 {
368 if (charset->method == CHARSET_METHOD_MAP)
369 for (; from_index <= to_index; from_index++, from_c++)
370 ASET (vec, from_index, make_number (from_c));
371 else
372 for (; from_index <= to_index; from_index++, from_c++)
373 CHAR_TABLE_SET (Vchar_unify_table,
374 CHARSET_CODE_OFFSET (charset) + from_index,
375 make_number (from_c));
376 }
377 else if (control_flag == 2)
378 {
379 if (charset->method == CHARSET_METHOD_MAP
380 && CHARSET_COMPACT_CODES_P (charset))
381 for (; from_index <= to_index; from_index++, from_c++)
382 {
383 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
384
385 if (NILP (CHAR_TABLE_REF (table, from_c)))
386 CHAR_TABLE_SET (table, from_c, make_number (code));
387 }
388 else
389 for (; from_index <= to_index; from_index++, from_c++)
390 {
391 if (NILP (CHAR_TABLE_REF (table, from_c)))
392 CHAR_TABLE_SET (table, from_c, make_number (from_index));
393 }
394 }
395 else if (control_flag == 3)
396 for (; from_index <= to_index; from_index++, from_c++)
397 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
398 else if (control_flag == 4)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
401 else /* control_flag == 0 */
402 {
3b4f4446
KH
403 if (ascii_compatible_p)
404 {
405 if (! ASCII_BYTE_P (from_c))
406 {
407 if (from_c < nonascii_min_char)
408 nonascii_min_char = from_c;
409 }
410 else if (! ASCII_BYTE_P (to_c))
411 {
412 nonascii_min_char = 0x80;
413 }
414 }
177c0ea7 415
ecca2aad
KH
416 for (; from_c <= to_c; from_c++)
417 CHARSET_FAST_MAP_SET (from_c, fast_map);
2e344af3 418 }
8ac5a9cc 419 }
3263d5a2 420
ecca2aad 421 if (control_flag == 0)
4ed46869 422 {
3263d5a2
KH
423 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
424 ? nonascii_min_char : min_char);
425 CHARSET_MAX_CHAR (charset) = max_char;
4ed46869 426 }
ecca2aad
KH
427 else if (control_flag == 4)
428 {
429 temp_charset_work->min_char = min_char;
430 temp_charset_work->max_char = max_char;
431 }
4ed46869
KH
432}
433
12bcae05 434
3263d5a2
KH
435/* Read a hexadecimal number (preceded by "0x") from the file FP while
436 paying attention to comment charcter '#'. */
12bcae05 437
3263d5a2 438static INLINE unsigned
971de7fb 439read_hex (FILE *fp, int *eof)
12bcae05 440{
3263d5a2
KH
441 int c;
442 unsigned n;
12bcae05 443
3263d5a2
KH
444 while ((c = getc (fp)) != EOF)
445 {
69f8de5b 446 if (c == '#')
3263d5a2
KH
447 {
448 while ((c = getc (fp)) != EOF && c != '\n');
449 }
450 else if (c == '0')
451 {
452 if ((c = getc (fp)) == EOF || c == 'x')
453 break;
454 }
8f924df7 455 }
3263d5a2
KH
456 if (c == EOF)
457 {
458 *eof = 1;
459 return 0;
460 }
461 *eof = 0;
462 n = 0;
463 if (c == 'x')
464 while ((c = getc (fp)) != EOF && isxdigit (c))
465 n = ((n << 4)
466 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
467 else
468 while ((c = getc (fp)) != EOF && isdigit (c))
469 n = (n * 10) + c - '0';
e9ce014c
KH
470 if (c != EOF)
471 ungetc (c, fp);
3263d5a2
KH
472 return n;
473}
12bcae05 474
dde2559c 475extern Lisp_Object Qfile_name_handler_alist;
537efd8d 476
3263d5a2 477/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
478 Each line of MAPFILE has this form
479 0xAAAA 0xCCCC
480 where 0xAAAA is a code-point and 0xCCCC is the corresponding
481 character code, or this form
482 0xAAAA-0xBBBB 0xCCCC
483 where 0xAAAA and 0xBBBB are code-points specifying a range, and
484 0xCCCC is the first character code of the range.
4ed46869 485
3263d5a2
KH
486 The returned vector has this form:
487 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c 488 where CODE1 is a code-point or a cons of code-points specifying a
dde2559c
KH
489 range.
490
18a10a21
JB
491 Note that this function uses `openp' to open MAPFILE but ignores
492 `file-name-handler-alist' to avoid running any Lisp code. */
4ed46869 493
f57e2426 494extern void add_to_log (char *, Lisp_Object, Lisp_Object);
4ed46869 495
e9ce014c 496static void
971de7fb 497load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
4ed46869 498{
e9ce014c
KH
499 unsigned min_code = CHARSET_MIN_CODE (charset);
500 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
501 int fd;
502 FILE *fp;
3263d5a2
KH
503 int eof;
504 Lisp_Object suffixes;
e9ce014c 505 struct charset_map_entries *head, *entries;
a2f3eb19
CY
506 int n_entries, count;
507 USE_SAFE_ALLOCA;
4ed46869 508
3263d5a2
KH
509 suffixes = Fcons (build_string (".map"),
510 Fcons (build_string (".TXT"), Qnil));
4ed46869 511
a2f3eb19 512 count = SPECPDL_INDEX ();
dde2559c 513 specbind (Qfile_name_handler_alist, Qnil);
4beef065 514 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
dde2559c 515 unbind_to (count, Qnil);
3263d5a2
KH
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
8458d4c1 518 error ("Failure in loading charset map: %S", SDATA (mapfile));
4ed46869 519
a2f3eb19
CY
520 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
521 large (larger than MAX_ALLOCA). */
522 SAFE_ALLOCA (head, struct charset_map_entries *,
523 sizeof (struct charset_map_entries));
524 entries = head;
412c01b6 525 bzero (entries, sizeof (struct charset_map_entries));
a2f3eb19 526
e9ce014c 527 n_entries = 0;
3263d5a2
KH
528 eof = 0;
529 while (1)
530 {
e9ce014c
KH
531 unsigned from, to;
532 int c;
533 int idx;
4ed46869 534
e9ce014c 535 from = read_hex (fp, &eof);
3263d5a2
KH
536 if (eof)
537 break;
e9ce014c
KH
538 if (getc (fp) == '-')
539 to = read_hex (fp, &eof);
540 else
541 to = from;
542 c = (int) read_hex (fp, &eof);
ac4137cc 543
e9ce014c
KH
544 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
545 continue;
ac4137cc 546
e9ce014c 547 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 548 {
a2f3eb19
CY
549 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
550 sizeof (struct charset_map_entries));
e9ce014c 551 entries = entries->next;
412c01b6 552 bzero (entries, sizeof (struct charset_map_entries));
3263d5a2 553 }
e9ce014c
KH
554 idx = n_entries % 0x10000;
555 entries->entry[idx].from = from;
556 entries->entry[idx].to = to;
557 entries->entry[idx].c = c;
558 n_entries++;
3263d5a2
KH
559 }
560 fclose (fp);
177c0ea7 561
e9ce014c 562 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 563 SAFE_FREE ();
4ed46869
KH
564}
565
e9ce014c 566static void
971de7fb 567load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
23d2a7f1 568{
e9ce014c
KH
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
a2f3eb19 575 USE_SAFE_ALLOCA;
23d2a7f1 576
e9ce014c 577 if (len % 2 == 1)
3263d5a2 578 {
e9ce014c
KH
579 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
580 return;
3263d5a2 581 }
35e623fb 582
a2f3eb19
CY
583 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
584 large (larger than MAX_ALLOCA). */
585 SAFE_ALLOCA (head, struct charset_map_entries *,
586 sizeof (struct charset_map_entries));
587 entries = head;
412c01b6 588 bzero (entries, sizeof (struct charset_map_entries));
a2f3eb19 589
e9ce014c
KH
590 n_entries = 0;
591 for (i = 0; i < len; i += 2)
35e623fb 592 {
e9ce014c
KH
593 Lisp_Object val, val2;
594 unsigned from, to;
595 int c;
596 int idx;
d2665018 597
e9ce014c
KH
598 val = AREF (vec, i);
599 if (CONSP (val))
bbf12bb3 600 {
e9ce014c
KH
601 val2 = XCDR (val);
602 val = XCAR (val);
603 CHECK_NATNUM (val);
604 CHECK_NATNUM (val2);
605 from = XFASTINT (val);
606 to = XFASTINT (val2);
bbf12bb3 607 }
e9ce014c 608 else
bbf12bb3 609 {
e9ce014c
KH
610 CHECK_NATNUM (val);
611 from = to = XFASTINT (val);
bbf12bb3 612 }
e9ce014c
KH
613 val = AREF (vec, i + 1);
614 CHECK_NATNUM (val);
615 c = XFASTINT (val);
76d7b829 616
e9ce014c
KH
617 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
618 continue;
76d7b829 619
dcc694d7 620 if (n_entries > 0 && (n_entries % 0x10000) == 0)
e9ce014c 621 {
a2f3eb19
CY
622 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
623 sizeof (struct charset_map_entries));
e9ce014c 624 entries = entries->next;
412c01b6 625 bzero (entries, sizeof (struct charset_map_entries));
e9ce014c
KH
626 }
627 idx = n_entries % 0x10000;
628 entries->entry[idx].from = from;
629 entries->entry[idx].to = to;
630 entries->entry[idx].c = c;
631 n_entries++;
632 }
76d7b829 633
e9ce014c 634 load_charset_map (charset, head, n_entries, control_flag);
a2f3eb19 635 SAFE_FREE ();
ac4137cc
KH
636}
637
ecca2aad
KH
638
639/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
640 map it is (see the comment of load_charset_map for the detail). */
641
3263d5a2 642static void
971de7fb 643load_charset (struct charset *charset, int control_flag)
76d7b829 644{
ecca2aad 645 Lisp_Object map;
76d7b829 646
ecca2aad
KH
647 if (inhibit_load_charset_map
648 && temp_charset_work
649 && charset == temp_charset_work->current
56f00ed2 650 && ((control_flag == 2) == temp_charset_work->for_encoder))
ecca2aad
KH
651 return;
652
653 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
654 map = CHARSET_MAP (charset);
655 else if (CHARSET_UNIFIED_P (charset))
656 map = CHARSET_UNIFY_MAP (charset);
657 if (STRINGP (map))
658 load_charset_map_from_file (charset, map, control_flag);
659 else
660 load_charset_map_from_vector (charset, map, control_flag);
4ed46869 661}
76d7b829 662
3263d5a2
KH
663
664DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
665 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
666 (object)
667 Lisp_Object object;
23d2a7f1 668{
3263d5a2 669 return (CHARSETP (object) ? Qt : Qnil);
76d7b829
KH
670}
671
4ed46869 672
f57e2426
J
673void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
674 Lisp_Object function, Lisp_Object arg,
675 unsigned from, unsigned to);
ecca2aad
KH
676
677void
971de7fb 678map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
ecca2aad
KH
679{
680 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
681 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
682 Lisp_Object range;
683 int c, stop;
684 struct gcpro gcpro1;
685
686 range = Fcons (Qnil, Qnil);
687 GCPRO1 (range);
688
689 c = temp_charset_work->min_char;
690 stop = (temp_charset_work->max_char < 0x20000
691 ? temp_charset_work->max_char : 0xFFFF);
692
693 while (1)
694 {
695 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
696
697 if (index >= from_idx && index <= to_idx)
698 {
699 if (NILP (XCAR (range)))
700 XSETCAR (range, make_number (c));
701 }
702 else if (! NILP (XCAR (range)))
703 {
704 XSETCDR (range, make_number (c - 1));
705 if (c_function)
706 (*c_function) (arg, range);
707 else
708 call2 (function, range, arg);
709 XSETCAR (range, Qnil);
710 }
711 if (c == stop)
712 {
713 if (c == temp_charset_work->max_char)
714 {
715 if (! NILP (XCAR (range)))
716 {
717 XSETCDR (range, make_number (c));
718 if (c_function)
719 (*c_function) (arg, range);
720 else
721 call2 (function, range, arg);
722 }
723 break;
724 }
725 c = 0x1FFFF;
726 stop = temp_charset_work->max_char;
727 }
728 c++;
729 }
c542407d 730 UNGCPRO;
ecca2aad
KH
731}
732
4ed46869 733void
374c5cfd
KH
734map_charset_chars (c_function, function, arg,
735 charset, from, to)
f57e2426 736 void (*c_function) (Lisp_Object, Lisp_Object);
374c5cfd
KH
737 Lisp_Object function, arg;
738 struct charset *charset;
739 unsigned from, to;
4ed46869 740{
3263d5a2 741 Lisp_Object range;
374c5cfd 742 int partial;
3263d5a2 743
374c5cfd
KH
744 partial = (from > CHARSET_MIN_CODE (charset)
745 || to < CHARSET_MAX_CODE (charset));
746
3263d5a2 747 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
4ed46869 748 {
374c5cfd
KH
749 int from_idx = CODE_POINT_TO_INDEX (charset, from);
750 int to_idx = CODE_POINT_TO_INDEX (charset, to);
751 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
752 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
753
ecca2aad
KH
754 if (CHARSET_UNIFIED_P (charset))
755 {
756 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
757 load_charset (charset, 2);
758 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
759 map_char_table_for_charset (c_function, function,
760 CHARSET_DEUNIFIER (charset), arg,
761 partial ? charset : NULL, from, to);
762 else
763 map_charset_for_dump (c_function, function, arg, from, to);
764 }
765
374c5cfd 766 range = Fcons (make_number (from_c), make_number (to_c));
3263d5a2 767 if (NILP (function))
5af5dd92 768 (*c_function) (arg, range);
3263d5a2
KH
769 else
770 call2 (function, range, arg);
c83ef371 771 }
3263d5a2
KH
772 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
773 {
774 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
ecca2aad
KH
775 load_charset (charset, 2);
776 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
777 map_char_table_for_charset (c_function, function,
778 CHARSET_ENCODER (charset), arg,
779 partial ? charset : NULL, from, to);
780 else
781 map_charset_for_dump (c_function, function, arg, from, to);
3263d5a2 782 }
374c5cfd 783 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
4ed46869 784 {
374c5cfd
KH
785 Lisp_Object subset_info;
786 int offset;
787
788 subset_info = CHARSET_SUBSET (charset);
789 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
790 offset = XINT (AREF (subset_info, 3));
791 from -= offset;
792 if (from < XFASTINT (AREF (subset_info, 1)))
793 from = XFASTINT (AREF (subset_info, 1));
794 to -= offset;
795 if (to > XFASTINT (AREF (subset_info, 2)))
796 to = XFASTINT (AREF (subset_info, 2));
797 map_charset_chars (c_function, function, arg, charset, from, to);
4ed46869 798 }
374c5cfd
KH
799 else /* i.e. CHARSET_METHOD_SUPERSET */
800 {
801 Lisp_Object parents;
4ed46869 802
374c5cfd
KH
803 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
804 parents = XCDR (parents))
bbf12bb3 805 {
374c5cfd
KH
806 int offset;
807 unsigned this_from, this_to;
808
809 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
810 offset = XINT (XCDR (XCAR (parents)));
12b55765
KH
811 this_from = from > offset ? from - offset : 0;
812 this_to = to > offset ? to - offset : 0;
374c5cfd
KH
813 if (this_from < CHARSET_MIN_CODE (charset))
814 this_from = CHARSET_MIN_CODE (charset);
815 if (this_to > CHARSET_MAX_CODE (charset))
816 this_to = CHARSET_MAX_CODE (charset);
111daccf
KH
817 map_charset_chars (c_function, function, arg, charset,
818 this_from, this_to);
bbf12bb3 819 }
35e623fb 820 }
4ed46869
KH
821}
822
374c5cfd 823DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
04c2f2c5 824 doc: /* Call FUNCTION for all characters in CHARSET.
374c5cfd 825FUNCTION is called with an argument RANGE and the optional 3rd
3263d5a2 826argument ARG.
4ed46869 827
374c5cfd
KH
828RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
829characters contained in CHARSET.
4ed46869 830
374c5cfd 831The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
72d51285 832range of code points (in CHARSET) of target characters. */)
374c5cfd
KH
833 (function, charset, arg, from_code, to_code)
834 Lisp_Object function, charset, arg, from_code, to_code;
4ed46869 835{
374c5cfd 836 struct charset *cs;
16fed1fc 837 unsigned from, to;
4ed46869 838
374c5cfd
KH
839 CHECK_CHARSET_GET_CHARSET (charset, cs);
840 if (NILP (from_code))
16fed1fc 841 from = CHARSET_MIN_CODE (cs);
970b7474 842 else
4ed46869 843 {
970b7474
KH
844 CHECK_NATNUM (from_code);
845 from = XINT (from_code);
846 if (from < CHARSET_MIN_CODE (cs))
847 from = CHARSET_MIN_CODE (cs);
4ed46869 848 }
374c5cfd 849 if (NILP (to_code))
970b7474 850 to = CHARSET_MAX_CODE (cs);
4ed46869
KH
851 else
852 {
970b7474
KH
853 CHECK_NATNUM (to_code);
854 to = XINT (to_code);
855 if (to > CHARSET_MAX_CODE (cs))
856 to = CHARSET_MAX_CODE (cs);
4ed46869 857 }
16fed1fc 858 map_charset_chars (NULL, function, arg, cs, from, to);
3263d5a2 859 return Qnil;
35e623fb 860}
4ed46869 861
4ed46869 862
3263d5a2
KH
863/* Define a charset according to the arguments. The Nth argument is
864 the Nth attribute of the charset (the last attribute `charset-id'
865 is not included). See the docstring of `define-charset' for the
866 detail. */
4ed46869 867
3263d5a2
KH
868DEFUN ("define-charset-internal", Fdefine_charset_internal,
869 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
870 doc: /* For internal use only.
871usage: (define-charset-internal ...) */)
3263d5a2
KH
872 (nargs, args)
873 int nargs;
874 Lisp_Object *args;
4ed46869 875{
3263d5a2
KH
876 /* Charset attr vector. */
877 Lisp_Object attrs;
878 Lisp_Object val;
879 unsigned hash_code;
880 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 881 int i, j;
3263d5a2
KH
882 struct charset charset;
883 int id;
884 int dimension;
885 int new_definition_p;
886 int nchars;
887
888 if (nargs != charset_arg_max)
889 return Fsignal (Qwrong_number_of_arguments,
890 Fcons (intern ("define-charset-internal"),
891 make_number (nargs)));
892
893 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
894
895 CHECK_SYMBOL (args[charset_arg_name]);
896 ASET (attrs, charset_name, args[charset_arg_name]);
897
898 val = args[charset_arg_code_space];
899 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 900 {
3263d5a2
KH
901 int min_byte, max_byte;
902
903 min_byte = XINT (Faref (val, make_number (i * 2)));
904 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
905 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
906 error ("Invalid :code-space value");
907 charset.code_space[i * 4] = min_byte;
908 charset.code_space[i * 4 + 1] = max_byte;
909 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
910 nchars *= charset.code_space[i * 4 + 2];
911 charset.code_space[i * 4 + 3] = nchars;
912 if (max_byte > 0)
913 dimension = i + 1;
914 }
4ed46869 915
3263d5a2
KH
916 val = args[charset_arg_dimension];
917 if (NILP (val))
918 charset.dimension = dimension;
919 else
4ed46869 920 {
3263d5a2
KH
921 CHECK_NATNUM (val);
922 charset.dimension = XINT (val);
923 if (charset.dimension < 1 || charset.dimension > 4)
924 args_out_of_range_3 (val, make_number (1), make_number (4));
4ed46869
KH
925 }
926
3263d5a2
KH
927 charset.code_linear_p
928 = (charset.dimension == 1
929 || (charset.code_space[2] == 256
930 && (charset.dimension == 2
931 || (charset.code_space[6] == 256
932 && (charset.dimension == 3
933 || charset.code_space[10] == 256)))));
934
69f8de5b 935 if (! charset.code_linear_p)
4ed46869 936 {
69f8de5b 937 charset.code_space_mask = (unsigned char *) xmalloc (256);
33df3183 938 bzero (charset.code_space_mask, 256);
69f8de5b
KH
939 for (i = 0; i < 4; i++)
940 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
941 j++)
942 charset.code_space_mask[j] |= (1 << i);
4ed46869
KH
943 }
944
3263d5a2 945 charset.iso_chars_96 = charset.code_space[2] == 96;
4ed46869 946
3263d5a2
KH
947 charset.min_code = (charset.code_space[0]
948 | (charset.code_space[4] << 8)
949 | (charset.code_space[8] << 16)
950 | (charset.code_space[12] << 24));
951 charset.max_code = (charset.code_space[1]
952 | (charset.code_space[5] << 8)
953 | (charset.code_space[9] << 16)
954 | (charset.code_space[13] << 24));
820ee249 955 charset.char_index_offset = 0;
8a73a704 956
820ee249
KH
957 val = args[charset_arg_min_code];
958 if (! NILP (val))
959 {
960 unsigned code;
fdb82f93 961
820ee249
KH
962 if (INTEGERP (val))
963 code = XINT (val);
964 else
965 {
966 CHECK_CONS (val);
8f924df7
KH
967 CHECK_NUMBER_CAR (val);
968 CHECK_NUMBER_CDR (val);
820ee249
KH
969 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
970 }
971 if (code < charset.min_code
972 || code > charset.max_code)
973 args_out_of_range_3 (make_number (charset.min_code),
974 make_number (charset.max_code), val);
975 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
976 charset.min_code = code;
977 }
3fac5a51 978
820ee249
KH
979 val = args[charset_arg_max_code];
980 if (! NILP (val))
3fac5a51 981 {
820ee249
KH
982 unsigned code;
983
984 if (INTEGERP (val))
985 code = XINT (val);
986 else
987 {
988 CHECK_CONS (val);
8f924df7
KH
989 CHECK_NUMBER_CAR (val);
990 CHECK_NUMBER_CDR (val);
820ee249
KH
991 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
992 }
993 if (code < charset.min_code
994 || code > charset.max_code)
995 args_out_of_range_3 (make_number (charset.min_code),
996 make_number (charset.max_code), val);
997 charset.max_code = code;
3fac5a51 998 }
3fac5a51 999
ecca2aad 1000 charset.compact_codes_p = charset.max_code < 0x10000;
4ed46869 1001
3263d5a2
KH
1002 val = args[charset_arg_invalid_code];
1003 if (NILP (val))
1004 {
1005 if (charset.min_code > 0)
1006 charset.invalid_code = 0;
bbf12bb3
KH
1007 else
1008 {
3263d5a2
KH
1009 XSETINT (val, charset.max_code + 1);
1010 if (XINT (val) == charset.max_code + 1)
1011 charset.invalid_code = charset.max_code + 1;
1012 else
1013 error ("Attribute :invalid-code must be specified");
76d7b829 1014 }
76d7b829 1015 }
3263d5a2
KH
1016 else
1017 {
1018 CHECK_NATNUM (val);
1019 charset.invalid_code = XFASTINT (val);
1020 }
4ed46869 1021
3263d5a2
KH
1022 val = args[charset_arg_iso_final];
1023 if (NILP (val))
1024 charset.iso_final = -1;
1025 else
1026 {
1027 CHECK_NUMBER (val);
1028 if (XINT (val) < '0' || XINT (val) > 127)
1029 error ("Invalid iso-final-char: %d", XINT (val));
1030 charset.iso_final = XINT (val);
1031 }
4ed46869 1032
3263d5a2
KH
1033 val = args[charset_arg_iso_revision];
1034 if (NILP (val))
1035 charset.iso_revision = -1;
1036 else
4ed46869 1037 {
3263d5a2
KH
1038 CHECK_NUMBER (val);
1039 if (XINT (val) > 63)
1040 args_out_of_range (make_number (63), val);
1041 charset.iso_revision = XINT (val);
4ed46869 1042 }
4ed46869 1043
3263d5a2
KH
1044 val = args[charset_arg_emacs_mule_id];
1045 if (NILP (val))
1046 charset.emacs_mule_id = -1;
4ed46869
KH
1047 else
1048 {
3263d5a2
KH
1049 CHECK_NATNUM (val);
1050 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1051 error ("Invalid emacs-mule-id: %d", XINT (val));
1052 charset.emacs_mule_id = XINT (val);
c83ef371 1053 }
f6302ac9 1054
3263d5a2 1055 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1d67c29b 1056
3263d5a2 1057 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
4ed46869 1058
3263d5a2
KH
1059 charset.unified_p = 0;
1060
1061 bzero (charset.fast_map, sizeof (charset.fast_map));
1062
1063 if (! NILP (args[charset_arg_code_offset]))
1064 {
1065 val = args[charset_arg_code_offset];
1066 CHECK_NUMBER (val);
1067
1068 charset.method = CHARSET_METHOD_OFFSET;
1069 charset.code_offset = XINT (val);
1070
1071 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1072 charset.min_char = i + charset.code_offset;
1073 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1074 charset.max_char = i + charset.code_offset;
1075 if (charset.max_char > MAX_CHAR)
1076 error ("Unsupported max char: %d", charset.max_char);
1077
f148205f
KH
1078 i = (charset.min_char >> 7) << 7;
1079 for (; i < 0x10000 && i <= charset.max_char; i += 128)
3263d5a2 1080 CHARSET_FAST_MAP_SET (i, charset.fast_map);
f148205f 1081 i = (i >> 12) << 12;
3263d5a2
KH
1082 for (; i <= charset.max_char; i += 0x1000)
1083 CHARSET_FAST_MAP_SET (i, charset.fast_map);
3620330b
KH
1084 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1085 charset.ascii_compatible_p = 1;
3263d5a2
KH
1086 }
1087 else if (! NILP (args[charset_arg_map]))
1088 {
1089 val = args[charset_arg_map];
1090 ASET (attrs, charset_map, val);
ecca2aad 1091 charset.method = CHARSET_METHOD_MAP;
3263d5a2 1092 }
374c5cfd 1093 else if (! NILP (args[charset_arg_subset]))
3263d5a2 1094 {
374c5cfd
KH
1095 Lisp_Object parent;
1096 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1097 struct charset *parent_charset;
1098
1099 val = args[charset_arg_subset];
1100 parent = Fcar (val);
1101 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1102 parent_min_code = Fnth (make_number (1), val);
1103 CHECK_NATNUM (parent_min_code);
1104 parent_max_code = Fnth (make_number (2), val);
1105 CHECK_NATNUM (parent_max_code);
1106 parent_code_offset = Fnth (make_number (3), val);
1107 CHECK_NUMBER (parent_code_offset);
1108 val = Fmake_vector (make_number (4), Qnil);
1109 ASET (val, 0, make_number (parent_charset->id));
1110 ASET (val, 1, parent_min_code);
1111 ASET (val, 2, parent_max_code);
1112 ASET (val, 3, parent_code_offset);
1113 ASET (attrs, charset_subset, val);
1114
1115 charset.method = CHARSET_METHOD_SUBSET;
1116 /* Here, we just copy the parent's fast_map. It's not accurate,
1117 but at least it works for quickly detecting which character
1118 DOESN'T belong to this charset. */
1119 for (i = 0; i < 190; i++)
1120 charset.fast_map[i] = parent_charset->fast_map[i];
1121
1122 /* We also copy these for parents. */
1123 charset.min_char = parent_charset->min_char;
1124 charset.max_char = parent_charset->max_char;
1125 }
1126 else if (! NILP (args[charset_arg_superset]))
0282eb69 1127 {
374c5cfd
KH
1128 val = args[charset_arg_superset];
1129 charset.method = CHARSET_METHOD_SUPERSET;
3263d5a2 1130 val = Fcopy_sequence (val);
374c5cfd 1131 ASET (attrs, charset_superset, val);
3263d5a2
KH
1132
1133 charset.min_char = MAX_CHAR;
1134 charset.max_char = 0;
1135 for (; ! NILP (val); val = Fcdr (val))
0282eb69 1136 {
3263d5a2
KH
1137 Lisp_Object elt, car_part, cdr_part;
1138 int this_id, offset;
1139 struct charset *this_charset;
2e344af3 1140
3263d5a2
KH
1141 elt = Fcar (val);
1142 if (CONSP (elt))
2e344af3 1143 {
3263d5a2
KH
1144 car_part = XCAR (elt);
1145 cdr_part = XCDR (elt);
1146 CHECK_CHARSET_GET_ID (car_part, this_id);
1147 CHECK_NUMBER (cdr_part);
1148 offset = XINT (cdr_part);
177c0ea7 1149 }
3263d5a2 1150 else
4ed46869 1151 {
3263d5a2
KH
1152 CHECK_CHARSET_GET_ID (elt, this_id);
1153 offset = 0;
4ed46869 1154 }
3263d5a2
KH
1155 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1156
1157 this_charset = CHARSET_FROM_ID (this_id);
1158 if (charset.min_char > this_charset->min_char)
1159 charset.min_char = this_charset->min_char;
1160 if (charset.max_char < this_charset->max_char)
1161 charset.max_char = this_charset->max_char;
1162 for (i = 0; i < 190; i++)
1163 charset.fast_map[i] |= this_charset->fast_map[i];
0282eb69 1164 }
0282eb69 1165 }
2e344af3 1166 else
3263d5a2 1167 error ("None of :code-offset, :map, :parents are specified");
05505664 1168
3263d5a2
KH
1169 val = args[charset_arg_unify_map];
1170 if (! NILP (val) && !STRINGP (val))
1171 CHECK_VECTOR (val);
1172 ASET (attrs, charset_unify_map, val);
05505664 1173
3263d5a2
KH
1174 CHECK_LIST (args[charset_arg_plist]);
1175 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 1176
3263d5a2
KH
1177 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1178 &hash_code);
1179 if (charset.hash_index >= 0)
1180 {
1181 new_definition_p = 0;
4f65af01 1182 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
1183 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1184 }
1a45ff10 1185 else
3263d5a2
KH
1186 {
1187 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1188 hash_code);
1189 if (charset_table_used == charset_table_size)
1190 {
2fe1edd1
KH
1191 struct charset *new_table
1192 = (struct charset *) xmalloc (sizeof (struct charset)
1193 * (charset_table_size + 16));
1194 bcopy (charset_table, new_table,
1195 sizeof (struct charset) * charset_table_size);
1196 charset_table_size += 16;
1197 charset_table = new_table;
3263d5a2
KH
1198 }
1199 id = charset_table_used++;
3263d5a2
KH
1200 new_definition_p = 1;
1201 }
2e344af3 1202
4f65af01 1203 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
1204 charset.id = id;
1205 charset_table[id] = charset;
2e344af3 1206
ecca2aad 1207 if (charset.method == CHARSET_METHOD_MAP)
b8ebe9dd
KH
1208 {
1209 load_charset (&charset, 0);
1210 charset_table[id] = charset;
1211 }
ecca2aad 1212
3263d5a2 1213 if (charset.iso_final >= 0)
4ed46869 1214 {
3263d5a2
KH
1215 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1216 charset.iso_final) = id;
1217 if (new_definition_p)
1218 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1219 Fcons (make_number (id), Qnil));
7c7dceee
KH
1220 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1221 charset_jisx0201_roman = id;
1222 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1223 charset_jisx0208_1978 = id;
1224 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1225 charset_jisx0208 = id;
d32320c4
KH
1226 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1227 charset_ksc5601 = id;
4ed46869 1228 }
d0cf2d48 1229
3263d5a2 1230 if (charset.emacs_mule_id >= 0)
4ed46869 1231 {
3263d5a2 1232 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
1233 if (charset.emacs_mule_id < 0xA0)
1234 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3b1ae89b
KH
1235 else
1236 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
3263d5a2
KH
1237 if (new_definition_p)
1238 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1239 Fcons (make_number (id), Qnil));
4ed46869
KH
1240 }
1241
3263d5a2
KH
1242 if (new_definition_p)
1243 {
1244 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
8055c66a
KH
1245 if (charset.supplementary_p)
1246 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1247 Fcons (make_number (id), Qnil));
1248 else
880820fe
KH
1249 {
1250 Lisp_Object tail;
1251
1252 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1253 {
1254 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1255
1256 if (cs->supplementary_p)
1257 break;
1258 }
1259 if (EQ (tail, Vcharset_ordered_list))
1260 Vcharset_ordered_list = Fcons (make_number (id),
1261 Vcharset_ordered_list);
1262 else if (NILP (tail))
1263 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1264 Fcons (make_number (id), Qnil));
1265 else
1266 {
1267 val = Fcons (XCAR (tail), XCDR (tail));
1268 XSETCDR (tail, val);
1269 XSETCAR (tail, make_number (id));
1270 }
1271 }
dbbb237d 1272 charset_ordered_list_tick++;
3263d5a2 1273 }
4ed46869 1274
3263d5a2 1275 return Qnil;
4ed46869
KH
1276}
1277
2fe1edd1
KH
1278
1279/* Same as Fdefine_charset_internal but arguments are more convenient
1280 to call from C (typically in syms_of_charset). This can define a
1281 charset of `offset' method only. Return the ID of the new
1282 charset. */
1283
1284static int
1285define_charset_internal (name, dimension, code_space, min_code, max_code,
1286 iso_final, iso_revision, emacs_mule_id,
7acf89e6 1287 ascii_compatible, supplementary,
2fe1edd1
KH
1288 code_offset)
1289 Lisp_Object name;
1290 int dimension;
1291 unsigned char *code_space;
1292 unsigned min_code, max_code;
1293 int iso_final, iso_revision, emacs_mule_id;
7acf89e6 1294 int ascii_compatible, supplementary;
2fe1edd1
KH
1295 int code_offset;
1296{
1297 Lisp_Object args[charset_arg_max];
1298 Lisp_Object plist[14];
1299 Lisp_Object val;
1300 int i;
1301
1302 args[charset_arg_name] = name;
1303 args[charset_arg_dimension] = make_number (dimension);
1304 val = Fmake_vector (make_number (8), make_number (0));
1305 for (i = 0; i < 8; i++)
1306 ASET (val, i, make_number (code_space[i]));
1307 args[charset_arg_code_space] = val;
1308 args[charset_arg_min_code] = make_number (min_code);
1309 args[charset_arg_max_code] = make_number (max_code);
1310 args[charset_arg_iso_final]
1311 = (iso_final < 0 ? Qnil : make_number (iso_final));
1312 args[charset_arg_iso_revision] = make_number (iso_revision);
1313 args[charset_arg_emacs_mule_id]
1314 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1315 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
7acf89e6 1316 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
2fe1edd1
KH
1317 args[charset_arg_invalid_code] = Qnil;
1318 args[charset_arg_code_offset] = make_number (code_offset);
1319 args[charset_arg_map] = Qnil;
1320 args[charset_arg_subset] = Qnil;
1321 args[charset_arg_superset] = Qnil;
1322 args[charset_arg_unify_map] = Qnil;
1323
d67b4f80 1324 plist[0] = intern_c_string (":name");
2fe1edd1 1325 plist[1] = args[charset_arg_name];
d67b4f80 1326 plist[2] = intern_c_string (":dimension");
2fe1edd1 1327 plist[3] = args[charset_arg_dimension];
d67b4f80 1328 plist[4] = intern_c_string (":code-space");
2fe1edd1 1329 plist[5] = args[charset_arg_code_space];
d67b4f80 1330 plist[6] = intern_c_string (":iso-final-char");
2fe1edd1 1331 plist[7] = args[charset_arg_iso_final];
d67b4f80 1332 plist[8] = intern_c_string (":emacs-mule-id");
2fe1edd1 1333 plist[9] = args[charset_arg_emacs_mule_id];
d67b4f80 1334 plist[10] = intern_c_string (":ascii-compatible-p");
2fe1edd1 1335 plist[11] = args[charset_arg_ascii_compatible_p];
d67b4f80 1336 plist[12] = intern_c_string (":code-offset");
2fe1edd1
KH
1337 plist[13] = args[charset_arg_code_offset];
1338
1339 args[charset_arg_plist] = Flist (14, plist);
1340 Fdefine_charset_internal (charset_arg_max, args);
1341
1342 return XINT (CHARSET_SYMBOL_ID (name));
1343}
1344
1345
3263d5a2
KH
1346DEFUN ("define-charset-alias", Fdefine_charset_alias,
1347 Sdefine_charset_alias, 2, 2, 0,
1348 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1349 (alias, charset)
1350 Lisp_Object alias, charset;
4ed46869 1351{
3263d5a2
KH
1352 Lisp_Object attr;
1353
1354 CHECK_CHARSET_GET_ATTR (charset, attr);
1355 Fputhash (alias, attr, Vcharset_hash_table);
528623a0 1356 Vcharset_list = Fcons (alias, Vcharset_list);
3263d5a2
KH
1357 return Qnil;
1358}
4ed46869 1359
4ed46869 1360
3263d5a2 1361DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
56a46d1d 1362 doc: /* Return the property list of CHARSET. */)
3263d5a2
KH
1363 (charset)
1364 Lisp_Object charset;
1365{
1366 Lisp_Object attrs;
1367
1368 CHECK_CHARSET_GET_ATTR (charset, attrs);
1369 return CHARSET_ATTR_PLIST (attrs);
1370}
1371
1372
1373DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1374 doc: /* Set CHARSET's property list to PLIST. */)
1375 (charset, plist)
1376 Lisp_Object charset, plist;
1377{
1378 Lisp_Object attrs;
1379
1380 CHECK_CHARSET_GET_ATTR (charset, attrs);
1381 CHARSET_ATTR_PLIST (attrs) = plist;
1382 return plist;
1383}
1384
1385
dbbb237d 1386DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
56a46d1d
DL
1387 doc: /* Unify characters of CHARSET with Unicode.
1388This means reading the relevant file and installing the table defined
dbbb237d
KH
1389by CHARSET's `:unify-map' property.
1390
64165ae2
DL
1391Optional second arg UNIFY-MAP is a file name string or a vector. It has
1392the same meaning as the `:unify-map' attribute in the function
dbbb237d
KH
1393`define-charset' (which see).
1394
1395Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1396 (charset, unify_map, deunify)
1397 Lisp_Object charset, unify_map, deunify;
8a73a704 1398{
3263d5a2
KH
1399 int id;
1400 struct charset *cs;
8f924df7 1401
3263d5a2
KH
1402 CHECK_CHARSET_GET_ID (charset, id);
1403 cs = CHARSET_FROM_ID (id);
dbbb237d
KH
1404 if (NILP (deunify)
1405 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1406 : ! CHARSET_UNIFIED_P (cs))
3263d5a2 1407 return Qnil;
dbbb237d 1408
3263d5a2 1409 CHARSET_UNIFIED_P (cs) = 0;
dbbb237d
KH
1410 if (NILP (deunify))
1411 {
ecca2aad
KH
1412 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1413 || CHARSET_CODE_OFFSET (cs) < 0x110000)
8f924df7 1414 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
dbbb237d
KH
1415 if (NILP (unify_map))
1416 unify_map = CHARSET_UNIFY_MAP (cs);
dbbb237d 1417 else
ecca2aad
KH
1418 {
1419 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1420 signal_error ("Bad unify-map", unify_map);
1421 CHARSET_UNIFY_MAP (cs) = unify_map;
1422 }
1423 if (NILP (Vchar_unify_table))
1424 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1425 char_table_set_range (Vchar_unify_table,
1426 cs->min_char, cs->max_char, charset);
dbbb237d
KH
1427 CHARSET_UNIFIED_P (cs) = 1;
1428 }
1429 else if (CHAR_TABLE_P (Vchar_unify_table))
1430 {
1431 int min_code = CHARSET_MIN_CODE (cs);
1432 int max_code = CHARSET_MAX_CODE (cs);
1433 int min_char = DECODE_CHAR (cs, min_code);
1434 int max_char = DECODE_CHAR (cs, max_code);
8f924df7 1435
dbbb237d
KH
1436 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1437 }
8f924df7 1438
3263d5a2 1439 return Qnil;
8a73a704
KH
1440}
1441
3fac5a51
KH
1442DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1443 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2 1444 doc: /*
d0cf2d48 1445Return an unused ISO final char for a charset of DIMENSION and CHARS.
fdb82f93
PJ
1446DIMENSION is the number of bytes to represent a character: 1 or 2.
1447CHARS is the number of characters in a dimension: 94 or 96.
1448
1449This final char is for private use, thus the range is `0' (48) .. `?' (63).
1721b6af 1450If there's no unused final char for the specified kind of charset,
fdb82f93
PJ
1451return nil. */)
1452 (dimension, chars)
3fac5a51
KH
1453 Lisp_Object dimension, chars;
1454{
1455 int final_char;
1456
b7826503
PJ
1457 CHECK_NUMBER (dimension);
1458 CHECK_NUMBER (chars);
3263d5a2
KH
1459 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1460 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1461 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1462 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1463 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1464 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1465 break;
3fac5a51
KH
1466 return (final_char <= '?' ? make_number (final_char) : Qnil);
1467}
1468
3263d5a2 1469static void
971de7fb 1470check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
4ed46869 1471{
3263d5a2
KH
1472 CHECK_NATNUM (dimension);
1473 CHECK_NATNUM (chars);
1474 CHECK_NATNUM (final_char);
4ed46869 1475
3263d5a2
KH
1476 if (XINT (dimension) > 3)
1477 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1478 if (XINT (chars) != 94 && XINT (chars) != 96)
1479 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1480 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1481 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1482}
1483
1484
1485DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1486 4, 4, 0,
cefd8c4f
KH
1487 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1488
1489On decoding by an ISO-2022 base coding system, when a charset
1490specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1491if CHARSET is designated instead. */)
3263d5a2
KH
1492 (dimension, chars, final_char, charset)
1493 Lisp_Object dimension, chars, final_char, charset;
1494{
1495 int id;
82215ce9 1496 int chars_flag;
4ed46869 1497
3263d5a2
KH
1498 CHECK_CHARSET_GET_ID (charset, id);
1499 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
1500 chars_flag = XINT (chars) == 96;
1501 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
4ed46869
KH
1502 return Qnil;
1503}
1504
3263d5a2 1505
2e344af3
KH
1506/* Return information about charsets in the text at PTR of NBYTES
1507 bytes, which are NCHARS characters. The value is:
f6302ac9 1508
cfe34140 1509 0: Each character is represented by one byte. This is always
3263d5a2
KH
1510 true for a unibyte string. For a multibyte string, true if
1511 it contains only ASCII characters.
1512
28c026cd
DL
1513 1: No charsets other than ascii, control-1, and latin-1 are
1514 found.
1d67c29b 1515
3263d5a2
KH
1516 2: Otherwise.
1517*/
4ed46869
KH
1518
1519int
971de7fb 1520string_xstring_p (Lisp_Object string)
4ed46869 1521{
8f924df7
KH
1522 const unsigned char *p = SDATA (string);
1523 const unsigned char *endp = p + SBYTES (string);
3263d5a2 1524
8f924df7 1525 if (SCHARS (string) == SBYTES (string))
3263d5a2
KH
1526 return 0;
1527
3263d5a2 1528 while (p < endp)
0282eb69 1529 {
3263d5a2 1530 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1531
3cc67a4d 1532 if (c >= 0x100)
3263d5a2 1533 return 2;
0282eb69 1534 }
3263d5a2
KH
1535 return 1;
1536}
05505664 1537
05505664 1538
3263d5a2 1539/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1540
3cc67a4d
KH
1541 CHARSETS is a vector. If Nth element is non-nil, it means the
1542 charset whose id is N is already found.
2e344af3 1543
3263d5a2 1544 It may lookup a translation table TABLE if supplied. */
2e344af3 1545
3263d5a2 1546static void
971de7fb 1547find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
3263d5a2 1548{
dbbb237d 1549 const unsigned char *pend = ptr + nbytes;
3263d5a2
KH
1550
1551 if (nchars == nbytes)
3263d5a2 1552 {
3cc67a4d
KH
1553 if (multibyte)
1554 ASET (charsets, charset_ascii, Qt);
1555 else
1556 while (ptr < pend)
1557 {
1558 int c = *ptr++;
1559
1560 if (!NILP (table))
1561 c = translate_char (table, c);
1562 if (ASCII_BYTE_P (c))
1563 ASET (charsets, charset_ascii, Qt);
1564 else
1565 ASET (charsets, charset_eight_bit, Qt);
1566 }
1567 }
1568 else
1569 {
1570 while (ptr < pend)
3263d5a2 1571 {
3cc67a4d
KH
1572 int c = STRING_CHAR_ADVANCE (ptr);
1573 struct charset *charset;
3263d5a2 1574
3cc67a4d
KH
1575 if (!NILP (table))
1576 c = translate_char (table, c);
1577 charset = CHAR_CHARSET (c);
1578 ASET (charsets, CHARSET_ID (charset), Qt);
4ed46869 1579 }
4ed46869 1580 }
4ed46869
KH
1581}
1582
1583DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1584 2, 3, 0,
fdb82f93
PJ
1585 doc: /* Return a list of charsets in the region between BEG and END.
1586BEG and END are buffer positions.
1587Optional arg TABLE if non-nil is a translation table to look up.
1588
fdb82f93
PJ
1589If the current buffer is unibyte, the returned list may contain
1590only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1591 (beg, end, table)
23d2a7f1 1592 Lisp_Object beg, end, table;
4ed46869 1593{
3263d5a2 1594 Lisp_Object charsets;
42ca828e
DL
1595 EMACS_INT from, from_byte, to, stop, stop_byte;
1596 int i;
4ed46869 1597 Lisp_Object val;
3cc67a4d 1598 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
4ed46869
KH
1599
1600 validate_region (&beg, &end);
1601 from = XFASTINT (beg);
1602 stop = to = XFASTINT (end);
6ae1f27e 1603
4ed46869 1604 if (from < GPT && GPT < to)
6ae1f27e
RS
1605 {
1606 stop = GPT;
1607 stop_byte = GPT_BYTE;
1608 }
1609 else
1610 stop_byte = CHAR_TO_BYTE (stop);
1611
1612 from_byte = CHAR_TO_BYTE (from);
1613
3263d5a2 1614 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
4ed46869
KH
1615 while (1)
1616 {
3263d5a2 1617 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
3cc67a4d
KH
1618 stop_byte - from_byte, charsets, table,
1619 multibyte);
4ed46869 1620 if (stop < to)
6ae1f27e
RS
1621 {
1622 from = stop, from_byte = stop_byte;
1623 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1624 }
4ed46869
KH
1625 else
1626 break;
1627 }
6ae1f27e 1628
4ed46869 1629 val = Qnil;
3263d5a2 1630 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1631 if (!NILP (AREF (charsets, i)))
3263d5a2 1632 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1633 return val;
1634}
1635
1636DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1637 1, 2, 0,
fdb82f93
PJ
1638 doc: /* Return a list of charsets in STR.
1639Optional arg TABLE if non-nil is a translation table to look up.
1640
fdb82f93 1641If STR is unibyte, the returned list may contain
3263d5a2 1642only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1643 (str, table)
23d2a7f1 1644 Lisp_Object str, table;
4ed46869 1645{
3263d5a2 1646 Lisp_Object charsets;
4ed46869
KH
1647 int i;
1648 Lisp_Object val;
1649
b7826503 1650 CHECK_STRING (str);
87b089ad 1651
3263d5a2 1652 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
8f924df7 1653 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
3cc67a4d
KH
1654 charsets, table,
1655 STRING_MULTIBYTE (str));
4ed46869 1656 val = Qnil;
3263d5a2 1657 for (i = charset_table_used - 1; i >= 0; i--)
3cc67a4d 1658 if (!NILP (AREF (charsets, i)))
3263d5a2 1659 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1660 return val;
1661}
2e344af3 1662
4ed46869 1663\f
3263d5a2 1664
ecca2aad
KH
1665/* Return a unified character code for C (>= 0x110000). VAL is a
1666 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1667 charset symbol. */
1668int
971de7fb 1669maybe_unify_char (int c, Lisp_Object val)
ecca2aad
KH
1670{
1671 struct charset *charset;
1672
1673 if (INTEGERP (val))
1674 return XINT (val);
1675 if (NILP (val))
1676 return c;
1677
1678 CHECK_CHARSET_GET_CHARSET (val, charset);
1679 load_charset (charset, 1);
1680 if (! inhibit_load_charset_map)
1681 {
1682 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1683 if (! NILP (val))
1684 c = XINT (val);
1685 }
1686 else
1687 {
1688 int code_index = c - CHARSET_CODE_OFFSET (charset);
1689 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1690
1691 if (unified > 0)
1692 c = unified;
1693 }
1694 return c;
1695}
1696
1697
3263d5a2
KH
1698/* Return a character correponding to the code-point CODE of
1699 CHARSET. */
1700
1701int
971de7fb 1702decode_char (struct charset *charset, unsigned int code)
4ed46869 1703{
3263d5a2
KH
1704 int c, char_index;
1705 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1706
3263d5a2
KH
1707 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1708 return -1;
4ed46869 1709
374c5cfd 1710 if (method == CHARSET_METHOD_SUBSET)
2e344af3 1711 {
374c5cfd
KH
1712 Lisp_Object subset_info;
1713
1714 subset_info = CHARSET_SUBSET (charset);
1715 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1716 code -= XINT (AREF (subset_info, 3));
1717 if (code < XFASTINT (AREF (subset_info, 1))
1718 || code > XFASTINT (AREF (subset_info, 2)))
1719 c = -1;
1720 else
1721 c = DECODE_CHAR (charset, code);
2e344af3 1722 }
374c5cfd 1723 else if (method == CHARSET_METHOD_SUPERSET)
2e344af3 1724 {
3263d5a2 1725 Lisp_Object parents;
4ed46869 1726
374c5cfd 1727 parents = CHARSET_SUPERSET (charset);
3263d5a2
KH
1728 c = -1;
1729 for (; CONSP (parents); parents = XCDR (parents))
1730 {
1731 int id = XINT (XCAR (XCAR (parents)));
1732 int code_offset = XINT (XCDR (XCAR (parents)));
374c5cfd 1733 unsigned this_code = code - code_offset;
4ed46869 1734
3263d5a2
KH
1735 charset = CHARSET_FROM_ID (id);
1736 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1737 break;
1738 }
1739 }
1740 else
ac4137cc 1741 {
3263d5a2 1742 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1743 if (char_index < 0)
1744 return -1;
4ed46869 1745
3263d5a2 1746 if (method == CHARSET_METHOD_MAP)
ac4137cc 1747 {
3263d5a2 1748 Lisp_Object decoder;
4ed46869 1749
3263d5a2
KH
1750 decoder = CHARSET_DECODER (charset);
1751 if (! VECTORP (decoder))
ecca2aad
KH
1752 {
1753 load_charset (charset, 1);
1754 decoder = CHARSET_DECODER (charset);
1755 }
1756 if (VECTORP (decoder))
1757 c = XINT (AREF (decoder, char_index));
1758 else
1759 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
ac4137cc 1760 }
ecca2aad 1761 else /* method == CHARSET_METHOD_OFFSET */
ac4137cc 1762 {
3263d5a2 1763 c = char_index + CHARSET_CODE_OFFSET (charset);
ecca2aad
KH
1764 if (CHARSET_UNIFIED_P (charset)
1765 && c > MAX_UNICODE_CHAR)
1766 MAYBE_UNIFY_CHAR (c);
ac4137cc
KH
1767 }
1768 }
4ed46869 1769
3263d5a2 1770 return c;
90d7b74e
KH
1771}
1772
374c5cfd
KH
1773/* Variable used temporarily by the macro ENCODE_CHAR. */
1774Lisp_Object charset_work;
4ed46869 1775
3263d5a2 1776/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
28c026cd
DL
1777 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1778 use CHARSET's strict_max_char instead of max_char. */
4ed46869 1779
3263d5a2 1780unsigned
971de7fb 1781encode_char (struct charset *charset, int c)
9d3d8cba 1782{
3263d5a2
KH
1783 unsigned code;
1784 enum charset_method method = CHARSET_METHOD (charset);
9d3d8cba 1785
3263d5a2 1786 if (CHARSET_UNIFIED_P (charset))
ac4137cc 1787 {
6809ca75 1788 Lisp_Object deunifier;
ecca2aad 1789 int code_index = -1;
4ed46869 1790
3263d5a2
KH
1791 deunifier = CHARSET_DEUNIFIER (charset);
1792 if (! CHAR_TABLE_P (deunifier))
ac4137cc 1793 {
ecca2aad 1794 load_charset (charset, 2);
3263d5a2 1795 deunifier = CHARSET_DEUNIFIER (charset);
ac4137cc 1796 }
ecca2aad
KH
1797 if (CHAR_TABLE_P (deunifier))
1798 {
1799 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1800
1801 if (INTEGERP (deunified))
1802 code_index = XINT (deunified);
1803 }
1804 else
1805 {
1806 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1807 }
1808 if (code_index >= 0)
1809 c = CHARSET_CODE_OFFSET (charset) + code_index;
ac4137cc 1810 }
9d3d8cba 1811
374c5cfd
KH
1812 if (method == CHARSET_METHOD_SUBSET)
1813 {
1814 Lisp_Object subset_info;
1815 struct charset *this_charset;
1816
1817 subset_info = CHARSET_SUBSET (charset);
1818 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1819 code = ENCODE_CHAR (this_charset, c);
1820 if (code == CHARSET_INVALID_CODE (this_charset)
1821 || code < XFASTINT (AREF (subset_info, 1))
1822 || code > XFASTINT (AREF (subset_info, 2)))
1823 return CHARSET_INVALID_CODE (charset);
1824 code += XINT (AREF (subset_info, 3));
1825 return code;
1826 }
9d3d8cba 1827
374c5cfd 1828 if (method == CHARSET_METHOD_SUPERSET)
859f2b3c 1829 {
3263d5a2 1830 Lisp_Object parents;
d2665018 1831
374c5cfd 1832 parents = CHARSET_SUPERSET (charset);
3263d5a2 1833 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1834 {
3263d5a2
KH
1835 int id = XINT (XCAR (XCAR (parents)));
1836 int code_offset = XINT (XCDR (XCAR (parents)));
1837 struct charset *this_charset = CHARSET_FROM_ID (id);
d2665018 1838
3263d5a2 1839 code = ENCODE_CHAR (this_charset, c);
dbbb237d
KH
1840 if (code != CHARSET_INVALID_CODE (this_charset))
1841 return code + code_offset;
beeedaad 1842 }
3263d5a2
KH
1843 return CHARSET_INVALID_CODE (charset);
1844 }
1bcc1567 1845
15c85a88
KH
1846 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1847 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1848 return CHARSET_INVALID_CODE (charset);
1bcc1567 1849
3263d5a2 1850 if (method == CHARSET_METHOD_MAP)
3f62427c 1851 {
3263d5a2 1852 Lisp_Object encoder;
beeedaad 1853 Lisp_Object val;
9b6a601f 1854
3263d5a2
KH
1855 encoder = CHARSET_ENCODER (charset);
1856 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
b8ebe9dd
KH
1857 {
1858 load_charset (charset, 2);
1859 encoder = CHARSET_ENCODER (charset);
1860 }
1861 if (CHAR_TABLE_P (encoder))
ecca2aad
KH
1862 {
1863 val = CHAR_TABLE_REF (encoder, c);
1864 if (NILP (val))
1865 return CHARSET_INVALID_CODE (charset);
1866 code = XINT (val);
1867 if (! CHARSET_COMPACT_CODES_P (charset))
1868 code = INDEX_TO_CODE_POINT (charset, code);
1869 }
1870 else
1871 {
1872 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1873 code = INDEX_TO_CODE_POINT (charset, code);
1874 }
3263d5a2 1875 }
820ee249 1876 else /* method == CHARSET_METHOD_OFFSET */
beeedaad 1877 {
ecca2aad
KH
1878 int code_index = c - CHARSET_CODE_OFFSET (charset);
1879
1880 code = INDEX_TO_CODE_POINT (charset, code_index);
3f62427c 1881 }
8ac5a9cc 1882
3263d5a2 1883 return code;
4ed46869
KH
1884}
1885
4ed46869 1886
3263d5a2
KH
1887DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1888 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1889Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1890
3263d5a2 1891CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
859f2b3c 1892
3263d5a2 1893Optional argument RESTRICTION specifies a way to map the pair of CCS
d0cf2d48 1894and CODE-POINT to a character. Currently not supported and just ignored. */)
3263d5a2
KH
1895 (charset, code_point, restriction)
1896 Lisp_Object charset, code_point, restriction;
4ed46869 1897{
3263d5a2
KH
1898 int c, id;
1899 unsigned code;
1900 struct charset *charsetp;
859f2b3c 1901
3263d5a2
KH
1902 CHECK_CHARSET_GET_ID (charset, id);
1903 if (CONSP (code_point))
1904 {
8f924df7
KH
1905 CHECK_NATNUM_CAR (code_point);
1906 CHECK_NATNUM_CDR (code_point);
69f8de5b 1907 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2 1908 }
4ed46869
KH
1909 else
1910 {
3263d5a2
KH
1911 CHECK_NATNUM (code_point);
1912 code = XINT (code_point);
4ed46869 1913 }
3263d5a2
KH
1914 charsetp = CHARSET_FROM_ID (id);
1915 c = DECODE_CHAR (charsetp, code);
1916 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1917}
1918
859f2b3c 1919
3263d5a2
KH
1920DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1921 doc: /* Encode the character CH into a code-point of CHARSET.
1922Return nil if CHARSET doesn't include CH.
17e7ef1b 1923
d0cf2d48 1924Optional argument RESTRICTION specifies a way to map CH to a
3263d5a2
KH
1925code-point in CCS. Currently not supported and just ignored. */)
1926 (ch, charset, restriction)
1927 Lisp_Object ch, charset, restriction;
4ed46869 1928{
16fed1fc 1929 int id;
3263d5a2
KH
1930 unsigned code;
1931 struct charset *charsetp;
046b1f03 1932
3263d5a2
KH
1933 CHECK_CHARSET_GET_ID (charset, id);
1934 CHECK_NATNUM (ch);
3263d5a2 1935 charsetp = CHARSET_FROM_ID (id);
16fed1fc 1936 code = ENCODE_CHAR (charsetp, XINT (ch));
3263d5a2
KH
1937 if (code == CHARSET_INVALID_CODE (charsetp))
1938 return Qnil;
1939 if (code > 0x7FFFFFF)
1940 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1941 return make_number (code);
beeedaad
KH
1942}
1943
beeedaad 1944
b121a744
KH
1945DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1946 doc:
1947 /* Return a character of CHARSET whose position codes are CODEn.
1948
1949CODE1 through CODE4 are optional, but if you don't supply sufficient
1950position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1951is specified. */)
b121a744
KH
1952 (charset, code1, code2, code3, code4)
1953 Lisp_Object charset, code1, code2, code3, code4;
beeedaad 1954{
3263d5a2
KH
1955 int id, dimension;
1956 struct charset *charsetp;
b121a744
KH
1957 unsigned code;
1958 int c;
87b089ad 1959
3263d5a2
KH
1960 CHECK_CHARSET_GET_ID (charset, id);
1961 charsetp = CHARSET_FROM_ID (id);
4ed46869 1962
b121a744
KH
1963 dimension = CHARSET_DIMENSION (charsetp);
1964 if (NILP (code1))
d47073ca
KH
1965 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1966 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1967 else
859f2b3c 1968 {
b121a744
KH
1969 CHECK_NATNUM (code1);
1970 if (XFASTINT (code1) >= 0x100)
1971 args_out_of_range (make_number (0xFF), code1);
1972 code = XFASTINT (code1);
859f2b3c 1973
b0a1e45e 1974 if (dimension > 1)
beeedaad 1975 {
b121a744 1976 code <<= 8;
b0a1e45e
KH
1977 if (NILP (code2))
1978 code |= charsetp->code_space[(dimension - 2) * 4];
beeedaad 1979 else
b121a744 1980 {
b0a1e45e
KH
1981 CHECK_NATNUM (code2);
1982 if (XFASTINT (code2) >= 0x100)
1983 args_out_of_range (make_number (0xFF), code2);
1984 code |= XFASTINT (code2);
b121a744 1985 }
99529c2c 1986
b0a1e45e 1987 if (dimension > 2)
b121a744
KH
1988 {
1989 code <<= 8;
b0a1e45e
KH
1990 if (NILP (code3))
1991 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1992 else
1993 {
b0a1e45e
KH
1994 CHECK_NATNUM (code3);
1995 if (XFASTINT (code3) >= 0x100)
1996 args_out_of_range (make_number (0xFF), code3);
1997 code |= XFASTINT (code3);
1998 }
1999
2000 if (dimension > 3)
2001 {
2002 code <<= 8;
2003 if (NILP (code4))
2004 code |= charsetp->code_space[0];
2005 else
2006 {
2007 CHECK_NATNUM (code4);
2008 if (XFASTINT (code4) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code4);
2010 code |= XFASTINT (code4);
2011 }
b121a744
KH
2012 }
2013 }
beeedaad 2014 }
859f2b3c 2015 }
beeedaad 2016
b121a744
KH
2017 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2018 code &= 0x7F7F7F7F;
2019 c = DECODE_CHAR (charsetp, code);
2020 if (c < 0)
2021 error ("Invalid code(s)");
3263d5a2 2022 return make_number (c);
4ed46869
KH
2023}
2024
beeedaad 2025
3263d5a2
KH
2026/* Return the first charset in CHARSET_LIST that contains C.
2027 CHARSET_LIST is a list of charset IDs. If it is nil, use
2028 Vcharset_ordered_list. */
beeedaad 2029
3263d5a2 2030struct charset *
971de7fb 2031char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2e344af3 2032{
8a1816bb
KH
2033 int maybe_null = 0;
2034
3263d5a2
KH
2035 if (NILP (charset_list))
2036 charset_list = Vcharset_ordered_list;
8a1816bb
KH
2037 else
2038 maybe_null = 1;
beeedaad 2039
6c652beb 2040 while (CONSP (charset_list))
2e344af3 2041 {
3263d5a2
KH
2042 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2043 unsigned code = ENCODE_CHAR (charset, c);
beeedaad 2044
3263d5a2 2045 if (code != CHARSET_INVALID_CODE (charset))
beeedaad 2046 {
3263d5a2
KH
2047 if (code_return)
2048 *code_return = code;
2049 return charset;
3f62427c 2050 }
3263d5a2 2051 charset_list = XCDR (charset_list);
c0be27fd
KH
2052 if (! maybe_null
2053 && c <= MAX_UNICODE_CHAR
2054 && EQ (charset_list, Vcharset_non_preferred_head))
6c652beb 2055 return CHARSET_FROM_ID (charset_unicode);
3f62427c 2056 }
8a1816bb
KH
2057 return (maybe_null ? NULL
2058 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
6a9c90ec 2059 : CHARSET_FROM_ID (charset_eight_bit));
3f62427c
KH
2060}
2061
2e344af3 2062
3263d5a2 2063DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
3cc67a4d 2064 doc:
d0cf2d48 2065 /*Return list of charset and one to four position-codes of CH.
3cc67a4d
KH
2066The charset is decided by the current priority order of charsets.
2067A position-code is a byte value of each dimension of the code-point of
d0cf2d48 2068CH in the charset. */)
3263d5a2
KH
2069 (ch)
2070 Lisp_Object ch;
4ed46869 2071{
3263d5a2
KH
2072 struct charset *charset;
2073 int c, dimension;
2074 unsigned code;
4ed46869
KH
2075 Lisp_Object val;
2076
3263d5a2
KH
2077 CHECK_CHARACTER (ch);
2078 c = XFASTINT (ch);
2079 charset = CHAR_CHARSET (c);
2080 if (! charset)
3cc67a4d 2081 abort ();
3263d5a2
KH
2082 code = ENCODE_CHAR (charset, c);
2083 if (code == CHARSET_INVALID_CODE (charset))
2084 abort ();
2085 dimension = CHARSET_DIMENSION (charset);
3cc67a4d
KH
2086 for (val = Qnil; dimension > 0; dimension--)
2087 {
2088 val = Fcons (make_number (code & 0xFF), val);
2089 code >>= 8;
2090 }
3263d5a2 2091 return Fcons (CHARSET_NAME (charset), val);
4ed46869
KH
2092}
2093
740f080d 2094
4cb75c4b
KH
2095DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2096 doc: /* Return the charset of highest priority that contains CH.
2097If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2098from which to find the charset. It may also be a coding system. In
2099that case, find the charset from what supported by that coding system. */)
2100 (ch, restriction)
2101 Lisp_Object ch, restriction;
4ed46869 2102{
3263d5a2 2103 struct charset *charset;
4ed46869 2104
3263d5a2 2105 CHECK_CHARACTER (ch);
4cb75c4b
KH
2106 if (NILP (restriction))
2107 charset = CHAR_CHARSET (XINT (ch));
2108 else
2109 {
2110 Lisp_Object charset_list;
2111
2112 if (CONSP (restriction))
2113 {
2114 for (charset_list = Qnil; CONSP (restriction);
2115 restriction = XCDR (restriction))
2116 {
2117 int id;
2118
2119 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2120 charset_list = Fcons (make_number (id), charset_list);
2121 }
2122 charset_list = Fnreverse (charset_list);
2123 }
2124 else
2125 charset_list = coding_system_charset_list (restriction);
2126 charset = char_charset (XINT (ch), charset_list, NULL);
2127 if (! charset)
2128 return Qnil;
2129 }
3263d5a2 2130 return (CHARSET_NAME (charset));
4ed46869
KH
2131}
2132
17e7ef1b 2133
3263d5a2
KH
2134DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2135 doc: /*
2136Return charset of a character in the current buffer at position POS.
2137If POS is nil, it defauls to the current point.
2138If POS is out of range, the value is nil. */)
2139 (pos)
2140 Lisp_Object pos;
2e344af3 2141{
3263d5a2
KH
2142 Lisp_Object ch;
2143 struct charset *charset;
046b1f03 2144
3263d5a2
KH
2145 ch = Fchar_after (pos);
2146 if (! INTEGERP (ch))
2147 return ch;
2148 charset = CHAR_CHARSET (XINT (ch));
2149 return (CHARSET_NAME (charset));
6ae1f27e 2150}
9036eb45 2151
87b089ad 2152
3263d5a2
KH
2153DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2154 doc: /*
2155Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2156
2157ISO 2022's designation sequence (escape sequence) distinguishes charsets
2158by their DIMENSION, CHARS, and FINAL-CHAR,
d0cf2d48 2159whereas Emacs distinguishes them by charset symbol.
3263d5a2
KH
2160See the documentation of the function `charset-info' for the meanings of
2161DIMENSION, CHARS, and FINAL-CHAR. */)
2162 (dimension, chars, final_char)
2163 Lisp_Object dimension, chars, final_char;
6ae1f27e 2164{
3263d5a2 2165 int id;
82215ce9 2166 int chars_flag;
a8a35e61 2167
3263d5a2 2168 check_iso_charset_parameter (dimension, chars, final_char);
82215ce9
KH
2169 chars_flag = XFASTINT (chars) == 96;
2170 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
3263d5a2
KH
2171 XFASTINT (final_char));
2172 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
046b1f03
RS
2173}
2174
87b089ad 2175
3263d5a2
KH
2176DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2177 0, 0, 0,
2178 doc: /*
ecca2aad
KH
2179Internal use only.
2180Clear temporary charset mapping tables.
2181It should be called only from temacs invoked for dumping. */)
3263d5a2 2182 ()
87b089ad 2183{
ecca2aad 2184 if (temp_charset_work)
87b089ad 2185 {
ecca2aad
KH
2186 free (temp_charset_work);
2187 temp_charset_work = NULL;
2e344af3 2188 }
2e344af3 2189
ecca2aad
KH
2190 if (CHAR_TABLE_P (Vchar_unify_table))
2191 Foptimize_char_table (Vchar_unify_table, Qnil);
740f080d 2192
3263d5a2 2193 return Qnil;
740f080d
KH
2194}
2195
8ddf5e57
DL
2196DEFUN ("charset-priority-list", Fcharset_priority_list,
2197 Scharset_priority_list, 0, 1, 0,
2198 doc: /* Return the list of charsets ordered by priority.
2199HIGHESTP non-nil means just return the highest priority one. */)
2200 (highestp)
2201 Lisp_Object highestp;
2e344af3 2202{
8ddf5e57 2203 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2e344af3 2204
8ddf5e57 2205 if (!NILP (highestp))
16fed1fc 2206 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2e344af3 2207
8ddf5e57 2208 while (!NILP (list))
2e344af3 2209 {
16fed1fc 2210 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
8ddf5e57 2211 list = XCDR (list);
2e344af3 2212 }
8ddf5e57 2213 return Fnreverse (val);
2e344af3
KH
2214}
2215
8ddf5e57
DL
2216DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2217 1, MANY, 0,
2218 doc: /* Assign higher priority to the charsets given as arguments.
2219usage: (set-charset-priority &rest charsets) */)
2220 (nargs, args)
2221 int nargs;
4ed46869
KH
2222 Lisp_Object *args;
2223{
af7c60ca 2224 Lisp_Object new_head, old_list, arglist[2];
321c819c 2225 Lisp_Object list_2022, list_emacs_mule;
16fed1fc 2226 int i, id;
4ed46869 2227
8ddf5e57 2228 old_list = Fcopy_sequence (Vcharset_ordered_list);
af7c60ca 2229 new_head = Qnil;
8ddf5e57 2230 for (i = 0; i < nargs; i++)
4ed46869 2231 {
8ddf5e57 2232 CHECK_CHARSET_GET_ID (args[i], id);
af7c60ca
KH
2233 if (! NILP (Fmemq (make_number (id), old_list)))
2234 {
2235 old_list = Fdelq (make_number (id), old_list);
2236 new_head = Fcons (make_number (id), new_head);
2237 }
5729c92f 2238 }
8ddf5e57 2239 arglist[0] = Fnreverse (new_head);
6a9c90ec 2240 arglist[1] = Vcharset_non_preferred_head = old_list;
8ddf5e57 2241 Vcharset_ordered_list = Fnconc (2, arglist);
dbbb237d 2242 charset_ordered_list_tick++;
5729c92f 2243
6809ca75 2244 charset_unibyte = -1;
321c819c 2245 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
d017b41e 2246 CONSP (old_list); old_list = XCDR (old_list))
5729c92f 2247 {
e77415b0 2248 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
321c819c
KH
2249 list_2022 = Fcons (XCAR (old_list), list_2022);
2250 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2251 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
6809ca75
KH
2252 if (charset_unibyte < 0)
2253 {
2254 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2255
2256 if (CHARSET_DIMENSION (charset) == 1
2257 && CHARSET_ASCII_COMPATIBLE_P (charset)
2258 && CHARSET_MAX_CHAR (charset) >= 0x80)
2259 charset_unibyte = CHARSET_ID (charset);
2260 }
4ed46869 2261 }
321c819c
KH
2262 Viso_2022_charset_list = Fnreverse (list_2022);
2263 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
6809ca75
KH
2264 if (charset_unibyte < 0)
2265 charset_unibyte = charset_iso_8859_1;
4ed46869 2266
8ddf5e57 2267 return Qnil;
4ed46869
KH
2268}
2269
d5b33309
KH
2270DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2271 0, 1, 0,
2272 doc: /* Internal use only.
2273Return charset identification number of CHARSET. */)
2274 (charset)
2275 Lisp_Object charset;
4ed46869 2276{
d5b33309 2277 int id;
4ed46869 2278
d5b33309
KH
2279 CHECK_CHARSET_GET_ID (charset, id);
2280 return make_number (id);
4ed46869
KH
2281}
2282
4ed46869 2283\f
3263d5a2 2284void
971de7fb 2285init_charset (void)
4ed46869 2286{
c8f94403
GM
2287 Lisp_Object tempdir;
2288 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
6809ca75 2289 if (access ((char *) SDATA (tempdir), 0) < 0)
c8f94403
GM
2290 {
2291 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2292Emacs will not function correctly without the character map files.\n\
2293Please check your installation!\n",
2294 tempdir);
2295 /* TODO should this be a fatal error? (Bug#909) */
2296 }
2297
2298 Vcharset_map_path = Fcons (tempdir, Qnil);
4ed46869
KH
2299}
2300
4ed46869 2301
dfcf069d 2302void
971de7fb 2303init_charset_once (void)
4ed46869
KH
2304{
2305 int i, j, k;
2306
3263d5a2
KH
2307 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2308 for (j = 0; j < ISO_MAX_CHARS; j++)
2309 for (k = 0; k < ISO_MAX_FINAL; k++)
2310 iso_charset_table[i][j][k] = -1;
4ed46869 2311
60383934 2312 for (i = 0; i < 256; i++)
3263d5a2 2313 emacs_mule_charset[i] = NULL;
4ed46869 2314
7c7dceee
KH
2315 charset_jisx0201_roman = -1;
2316 charset_jisx0208_1978 = -1;
2317 charset_jisx0208 = -1;
d32320c4 2318 charset_ksc5601 = -1;
4ed46869
KH
2319}
2320
2321#ifdef emacs
2322
dfcf069d 2323void
971de7fb 2324syms_of_charset (void)
4ed46869 2325{
3263d5a2
KH
2326 DEFSYM (Qcharsetp, "charsetp");
2327
2328 DEFSYM (Qascii, "ascii");
2329 DEFSYM (Qunicode, "unicode");
6c652beb 2330 DEFSYM (Qemacs, "emacs");
2fe1edd1 2331 DEFSYM (Qeight_bit, "eight-bit");
3263d5a2
KH
2332 DEFSYM (Qiso_8859_1, "iso-8859-1");
2333
2334 DEFSYM (Qgl, "gl");
2335 DEFSYM (Qgr, "gr");
2336
3263d5a2
KH
2337 staticpro (&Vcharset_ordered_list);
2338 Vcharset_ordered_list = Qnil;
2339
2340 staticpro (&Viso_2022_charset_list);
2341 Viso_2022_charset_list = Qnil;
2342
2343 staticpro (&Vemacs_mule_charset_list);
2344 Vemacs_mule_charset_list = Qnil;
2345
3943ed76
KH
2346 /* Don't staticpro them here. It's done in syms_of_fns. */
2347 QCtest = intern (":test");
2348 Qeq = intern ("eq");
2349
3263d5a2 2350 staticpro (&Vcharset_hash_table);
8f924df7
KH
2351 {
2352 Lisp_Object args[2];
2353 args[0] = QCtest;
2354 args[1] = Qeq;
2355 Vcharset_hash_table = Fmake_hash_table (2, args);
2356 }
3263d5a2
KH
2357
2358 charset_table_size = 128;
2359 charset_table = ((struct charset *)
2360 xmalloc (sizeof (struct charset) * charset_table_size));
2361 charset_table_used = 0;
2362
3263d5a2
KH
2363 defsubr (&Scharsetp);
2364 defsubr (&Smap_charset_chars);
2365 defsubr (&Sdefine_charset_internal);
2366 defsubr (&Sdefine_charset_alias);
3263d5a2
KH
2367 defsubr (&Scharset_plist);
2368 defsubr (&Sset_charset_plist);
2369 defsubr (&Sunify_charset);
3fac5a51 2370 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
2371 defsubr (&Sdeclare_equiv_charset);
2372 defsubr (&Sfind_charset_region);
2373 defsubr (&Sfind_charset_string);
3263d5a2
KH
2374 defsubr (&Sdecode_char);
2375 defsubr (&Sencode_char);
4ed46869 2376 defsubr (&Ssplit_char);
3263d5a2 2377 defsubr (&Smake_char);
4ed46869 2378 defsubr (&Schar_charset);
90d7b74e 2379 defsubr (&Scharset_after);
4ed46869 2380 defsubr (&Siso_charset);
3263d5a2 2381 defsubr (&Sclear_charset_maps);
8ddf5e57
DL
2382 defsubr (&Scharset_priority_list);
2383 defsubr (&Sset_charset_priority);
d5b33309 2384 defsubr (&Scharset_id_internal);
3263d5a2 2385
4beef065 2386 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
6f3122a7 2387 doc: /* *List of directories to search for charset map files. */);
4beef065 2388 Vcharset_map_path = Qnil;
4ed46869 2389
ecca2aad
KH
2390 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2391 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2392 inhibit_load_charset_map = 0;
2393
4ed46869 2394 DEFVAR_LISP ("charset-list", &Vcharset_list,
528623a0 2395 doc: /* List of all charsets ever defined. */);
3263d5a2
KH
2396 Vcharset_list = Qnil;
2397
6a9c90ec
KH
2398 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2399 doc: /* ISO639 language mnemonic symbol for the current language environment.
2400If the current language environment is for multiple languages (e.g. "Latin-1"),
2401the value may be a list of mnemonics. */);
2402 Vcurrent_iso639_language = Qnil;
2403
2fe1edd1
KH
2404 charset_ascii
2405 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2406 0, 127, 'B', -1, 0, 1, 0, 0);
14e3d523
KH
2407 charset_iso_8859_1
2408 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2409 0, 255, -1, -1, -1, 1, 0, 0);
2fe1edd1 2410 charset_unicode
73fbf2d9 2411 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2fe1edd1 2412 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
6c652beb
KH
2413 charset_emacs
2414 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2415 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2fe1edd1
KH
2416 charset_eight_bit
2417 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
880820fe 2418 128, 255, -1, 0, -1, 0, 1,
2fe1edd1 2419 MAX_5_BYTE_CHAR + 1);
6809ca75 2420 charset_unibyte = charset_iso_8859_1;
4ed46869
KH
2421}
2422
2423#endif /* emacs */
cefd8c4f
KH
2424
2425/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2426 (do not change this comment) */