Replace bcopy, bzero, bcmp by memcpy, memmove, memset, memcmp
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
41
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
49
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
54
55 */
56
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
60
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
64
65 /* Table of struct charset. */
66 struct charset *charset_table;
67
68 static int charset_table_size;
69 static int charset_table_used;
70
71 Lisp_Object Qcharsetp;
72
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
79
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
86
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
92
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
95
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
98
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
101
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
105
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
110
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
113
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
116
117 struct charset *emacs_mule_charset[256];
118
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
123 Lisp_Object Vcharset_map_path;
124
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
127
128 Lisp_Object Vcurrent_iso639_language;
129
130 /* Defined in chartab.c */
131 extern void
132 map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to);
136
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
152 : -1)
153
154
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
171
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
174
175 static struct
176 {
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205 } *temp_charset_work;
206
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
229 \f
230
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded;
234
235 struct charset_map_entries
236 {
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242 };
243
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
253
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
259
260 If CONTROL_FLAG is 2, setup the following tables.
261
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 */
268
269 static void
270 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
271 {
272 Lisp_Object vec, table;
273 unsigned max_code = CHARSET_MAX_CODE (charset);
274 int ascii_compatible_p = charset->ascii_compatible_p;
275 int min_char, max_char, nonascii_min_char;
276 int i;
277 unsigned char *fast_map = charset->fast_map;
278
279 if (n_entries <= 0)
280 return;
281
282 if (control_flag)
283 {
284 if (! inhibit_load_charset_map)
285 {
286 if (control_flag == 1)
287 {
288 if (charset->method == CHARSET_METHOD_MAP)
289 {
290 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
291
292 vec = CHARSET_DECODER (charset)
293 = Fmake_vector (make_number (n), make_number (-1));
294 }
295 else
296 {
297 char_table_set_range (Vchar_unify_table,
298 charset->min_char, charset->max_char,
299 Qnil);
300 }
301 }
302 else
303 {
304 table = Fmake_char_table (Qnil, Qnil);
305 if (charset->method == CHARSET_METHOD_MAP)
306 CHARSET_ENCODER (charset) = table;
307 else
308 CHARSET_DEUNIFIER (charset) = table;
309 }
310 }
311 else
312 {
313 if (! temp_charset_work)
314 temp_charset_work = malloc (sizeof (*temp_charset_work));
315 if (control_flag == 1)
316 {
317 memset (temp_charset_work->table.decoder, -1,
318 sizeof (int) * 0x10000);
319 }
320 else
321 {
322 memset (temp_charset_work->table.encoder, 0,
323 sizeof (unsigned short) * 0x20000);
324 temp_charset_work->zero_index_char = -1;
325 }
326 temp_charset_work->current = charset;
327 temp_charset_work->for_encoder = (control_flag == 2);
328 control_flag += 2;
329 }
330 charset_map_loaded = 1;
331 }
332
333 min_char = max_char = entries->entry[0].c;
334 nonascii_min_char = MAX_CHAR;
335 for (i = 0; i < n_entries; i++)
336 {
337 unsigned from, to;
338 int from_index, to_index;
339 int from_c, to_c;
340 int idx = i % 0x10000;
341
342 if (i > 0 && idx == 0)
343 entries = entries->next;
344 from = entries->entry[idx].from;
345 to = entries->entry[idx].to;
346 from_c = entries->entry[idx].c;
347 from_index = CODE_POINT_TO_INDEX (charset, from);
348 if (from == to)
349 {
350 to_index = from_index;
351 to_c = from_c;
352 }
353 else
354 {
355 to_index = CODE_POINT_TO_INDEX (charset, to);
356 to_c = from_c + (to_index - from_index);
357 }
358 if (from_index < 0 || to_index < 0)
359 continue;
360
361 if (to_c > max_char)
362 max_char = to_c;
363 else if (from_c < min_char)
364 min_char = from_c;
365
366 if (control_flag == 1)
367 {
368 if (charset->method == CHARSET_METHOD_MAP)
369 for (; from_index <= to_index; from_index++, from_c++)
370 ASET (vec, from_index, make_number (from_c));
371 else
372 for (; from_index <= to_index; from_index++, from_c++)
373 CHAR_TABLE_SET (Vchar_unify_table,
374 CHARSET_CODE_OFFSET (charset) + from_index,
375 make_number (from_c));
376 }
377 else if (control_flag == 2)
378 {
379 if (charset->method == CHARSET_METHOD_MAP
380 && CHARSET_COMPACT_CODES_P (charset))
381 for (; from_index <= to_index; from_index++, from_c++)
382 {
383 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
384
385 if (NILP (CHAR_TABLE_REF (table, from_c)))
386 CHAR_TABLE_SET (table, from_c, make_number (code));
387 }
388 else
389 for (; from_index <= to_index; from_index++, from_c++)
390 {
391 if (NILP (CHAR_TABLE_REF (table, from_c)))
392 CHAR_TABLE_SET (table, from_c, make_number (from_index));
393 }
394 }
395 else if (control_flag == 3)
396 for (; from_index <= to_index; from_index++, from_c++)
397 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
398 else if (control_flag == 4)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
401 else /* control_flag == 0 */
402 {
403 if (ascii_compatible_p)
404 {
405 if (! ASCII_BYTE_P (from_c))
406 {
407 if (from_c < nonascii_min_char)
408 nonascii_min_char = from_c;
409 }
410 else if (! ASCII_BYTE_P (to_c))
411 {
412 nonascii_min_char = 0x80;
413 }
414 }
415
416 for (; from_c <= to_c; from_c++)
417 CHARSET_FAST_MAP_SET (from_c, fast_map);
418 }
419 }
420
421 if (control_flag == 0)
422 {
423 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
424 ? nonascii_min_char : min_char);
425 CHARSET_MAX_CHAR (charset) = max_char;
426 }
427 else if (control_flag == 4)
428 {
429 temp_charset_work->min_char = min_char;
430 temp_charset_work->max_char = max_char;
431 }
432 }
433
434
435 /* Read a hexadecimal number (preceded by "0x") from the file FP while
436 paying attention to comment charcter '#'. */
437
438 static INLINE unsigned
439 read_hex (FILE *fp, int *eof)
440 {
441 int c;
442 unsigned n;
443
444 while ((c = getc (fp)) != EOF)
445 {
446 if (c == '#')
447 {
448 while ((c = getc (fp)) != EOF && c != '\n');
449 }
450 else if (c == '0')
451 {
452 if ((c = getc (fp)) == EOF || c == 'x')
453 break;
454 }
455 }
456 if (c == EOF)
457 {
458 *eof = 1;
459 return 0;
460 }
461 *eof = 0;
462 n = 0;
463 if (c == 'x')
464 while ((c = getc (fp)) != EOF && isxdigit (c))
465 n = ((n << 4)
466 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
467 else
468 while ((c = getc (fp)) != EOF && isdigit (c))
469 n = (n * 10) + c - '0';
470 if (c != EOF)
471 ungetc (c, fp);
472 return n;
473 }
474
475 extern Lisp_Object Qfile_name_handler_alist;
476
477 /* Return a mapping vector for CHARSET loaded from MAPFILE.
478 Each line of MAPFILE has this form
479 0xAAAA 0xCCCC
480 where 0xAAAA is a code-point and 0xCCCC is the corresponding
481 character code, or this form
482 0xAAAA-0xBBBB 0xCCCC
483 where 0xAAAA and 0xBBBB are code-points specifying a range, and
484 0xCCCC is the first character code of the range.
485
486 The returned vector has this form:
487 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
488 where CODE1 is a code-point or a cons of code-points specifying a
489 range.
490
491 Note that this function uses `openp' to open MAPFILE but ignores
492 `file-name-handler-alist' to avoid running any Lisp code. */
493
494 extern void add_to_log (char *, Lisp_Object, Lisp_Object);
495
496 static void
497 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
498 {
499 unsigned min_code = CHARSET_MIN_CODE (charset);
500 unsigned max_code = CHARSET_MAX_CODE (charset);
501 int fd;
502 FILE *fp;
503 int eof;
504 Lisp_Object suffixes;
505 struct charset_map_entries *head, *entries;
506 int n_entries, count;
507 USE_SAFE_ALLOCA;
508
509 suffixes = Fcons (build_string (".map"),
510 Fcons (build_string (".TXT"), Qnil));
511
512 count = SPECPDL_INDEX ();
513 specbind (Qfile_name_handler_alist, Qnil);
514 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
515 unbind_to (count, Qnil);
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 error ("Failure in loading charset map: %S", SDATA (mapfile));
519
520 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
521 large (larger than MAX_ALLOCA). */
522 SAFE_ALLOCA (head, struct charset_map_entries *,
523 sizeof (struct charset_map_entries));
524 entries = head;
525 memset (entries, 0, sizeof (struct charset_map_entries));
526
527 n_entries = 0;
528 eof = 0;
529 while (1)
530 {
531 unsigned from, to;
532 int c;
533 int idx;
534
535 from = read_hex (fp, &eof);
536 if (eof)
537 break;
538 if (getc (fp) == '-')
539 to = read_hex (fp, &eof);
540 else
541 to = from;
542 c = (int) read_hex (fp, &eof);
543
544 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
545 continue;
546
547 if (n_entries > 0 && (n_entries % 0x10000) == 0)
548 {
549 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
550 sizeof (struct charset_map_entries));
551 entries = entries->next;
552 memset (entries, 0, sizeof (struct charset_map_entries));
553 }
554 idx = n_entries % 0x10000;
555 entries->entry[idx].from = from;
556 entries->entry[idx].to = to;
557 entries->entry[idx].c = c;
558 n_entries++;
559 }
560 fclose (fp);
561
562 load_charset_map (charset, head, n_entries, control_flag);
563 SAFE_FREE ();
564 }
565
566 static void
567 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
568 {
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
575 USE_SAFE_ALLOCA;
576
577 if (len % 2 == 1)
578 {
579 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
580 return;
581 }
582
583 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
584 large (larger than MAX_ALLOCA). */
585 SAFE_ALLOCA (head, struct charset_map_entries *,
586 sizeof (struct charset_map_entries));
587 entries = head;
588 memset (entries, 0, sizeof (struct charset_map_entries));
589
590 n_entries = 0;
591 for (i = 0; i < len; i += 2)
592 {
593 Lisp_Object val, val2;
594 unsigned from, to;
595 int c;
596 int idx;
597
598 val = AREF (vec, i);
599 if (CONSP (val))
600 {
601 val2 = XCDR (val);
602 val = XCAR (val);
603 CHECK_NATNUM (val);
604 CHECK_NATNUM (val2);
605 from = XFASTINT (val);
606 to = XFASTINT (val2);
607 }
608 else
609 {
610 CHECK_NATNUM (val);
611 from = to = XFASTINT (val);
612 }
613 val = AREF (vec, i + 1);
614 CHECK_NATNUM (val);
615 c = XFASTINT (val);
616
617 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
618 continue;
619
620 if (n_entries > 0 && (n_entries % 0x10000) == 0)
621 {
622 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
623 sizeof (struct charset_map_entries));
624 entries = entries->next;
625 memset (entries, 0, sizeof (struct charset_map_entries));
626 }
627 idx = n_entries % 0x10000;
628 entries->entry[idx].from = from;
629 entries->entry[idx].to = to;
630 entries->entry[idx].c = c;
631 n_entries++;
632 }
633
634 load_charset_map (charset, head, n_entries, control_flag);
635 SAFE_FREE ();
636 }
637
638
639 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
640 map it is (see the comment of load_charset_map for the detail). */
641
642 static void
643 load_charset (struct charset *charset, int control_flag)
644 {
645 Lisp_Object map;
646
647 if (inhibit_load_charset_map
648 && temp_charset_work
649 && charset == temp_charset_work->current
650 && ((control_flag == 2) == temp_charset_work->for_encoder))
651 return;
652
653 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
654 map = CHARSET_MAP (charset);
655 else if (CHARSET_UNIFIED_P (charset))
656 map = CHARSET_UNIFY_MAP (charset);
657 if (STRINGP (map))
658 load_charset_map_from_file (charset, map, control_flag);
659 else
660 load_charset_map_from_vector (charset, map, control_flag);
661 }
662
663
664 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
665 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
666 (object)
667 Lisp_Object object;
668 {
669 return (CHARSETP (object) ? Qt : Qnil);
670 }
671
672
673 void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
674 Lisp_Object function, Lisp_Object arg,
675 unsigned from, unsigned to);
676
677 void
678 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
679 {
680 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
681 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
682 Lisp_Object range;
683 int c, stop;
684 struct gcpro gcpro1;
685
686 range = Fcons (Qnil, Qnil);
687 GCPRO1 (range);
688
689 c = temp_charset_work->min_char;
690 stop = (temp_charset_work->max_char < 0x20000
691 ? temp_charset_work->max_char : 0xFFFF);
692
693 while (1)
694 {
695 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
696
697 if (index >= from_idx && index <= to_idx)
698 {
699 if (NILP (XCAR (range)))
700 XSETCAR (range, make_number (c));
701 }
702 else if (! NILP (XCAR (range)))
703 {
704 XSETCDR (range, make_number (c - 1));
705 if (c_function)
706 (*c_function) (arg, range);
707 else
708 call2 (function, range, arg);
709 XSETCAR (range, Qnil);
710 }
711 if (c == stop)
712 {
713 if (c == temp_charset_work->max_char)
714 {
715 if (! NILP (XCAR (range)))
716 {
717 XSETCDR (range, make_number (c));
718 if (c_function)
719 (*c_function) (arg, range);
720 else
721 call2 (function, range, arg);
722 }
723 break;
724 }
725 c = 0x1FFFF;
726 stop = temp_charset_work->max_char;
727 }
728 c++;
729 }
730 UNGCPRO;
731 }
732
733 void
734 map_charset_chars (c_function, function, arg,
735 charset, from, to)
736 void (*c_function) (Lisp_Object, Lisp_Object);
737 Lisp_Object function, arg;
738 struct charset *charset;
739 unsigned from, to;
740 {
741 Lisp_Object range;
742 int partial;
743
744 partial = (from > CHARSET_MIN_CODE (charset)
745 || to < CHARSET_MAX_CODE (charset));
746
747 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
748 {
749 int from_idx = CODE_POINT_TO_INDEX (charset, from);
750 int to_idx = CODE_POINT_TO_INDEX (charset, to);
751 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
752 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
753
754 if (CHARSET_UNIFIED_P (charset))
755 {
756 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
757 load_charset (charset, 2);
758 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
759 map_char_table_for_charset (c_function, function,
760 CHARSET_DEUNIFIER (charset), arg,
761 partial ? charset : NULL, from, to);
762 else
763 map_charset_for_dump (c_function, function, arg, from, to);
764 }
765
766 range = Fcons (make_number (from_c), make_number (to_c));
767 if (NILP (function))
768 (*c_function) (arg, range);
769 else
770 call2 (function, range, arg);
771 }
772 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
773 {
774 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
775 load_charset (charset, 2);
776 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
777 map_char_table_for_charset (c_function, function,
778 CHARSET_ENCODER (charset), arg,
779 partial ? charset : NULL, from, to);
780 else
781 map_charset_for_dump (c_function, function, arg, from, to);
782 }
783 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
784 {
785 Lisp_Object subset_info;
786 int offset;
787
788 subset_info = CHARSET_SUBSET (charset);
789 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
790 offset = XINT (AREF (subset_info, 3));
791 from -= offset;
792 if (from < XFASTINT (AREF (subset_info, 1)))
793 from = XFASTINT (AREF (subset_info, 1));
794 to -= offset;
795 if (to > XFASTINT (AREF (subset_info, 2)))
796 to = XFASTINT (AREF (subset_info, 2));
797 map_charset_chars (c_function, function, arg, charset, from, to);
798 }
799 else /* i.e. CHARSET_METHOD_SUPERSET */
800 {
801 Lisp_Object parents;
802
803 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
804 parents = XCDR (parents))
805 {
806 int offset;
807 unsigned this_from, this_to;
808
809 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
810 offset = XINT (XCDR (XCAR (parents)));
811 this_from = from > offset ? from - offset : 0;
812 this_to = to > offset ? to - offset : 0;
813 if (this_from < CHARSET_MIN_CODE (charset))
814 this_from = CHARSET_MIN_CODE (charset);
815 if (this_to > CHARSET_MAX_CODE (charset))
816 this_to = CHARSET_MAX_CODE (charset);
817 map_charset_chars (c_function, function, arg, charset,
818 this_from, this_to);
819 }
820 }
821 }
822
823 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
824 doc: /* Call FUNCTION for all characters in CHARSET.
825 FUNCTION is called with an argument RANGE and the optional 3rd
826 argument ARG.
827
828 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
829 characters contained in CHARSET.
830
831 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
832 range of code points (in CHARSET) of target characters. */)
833 (function, charset, arg, from_code, to_code)
834 Lisp_Object function, charset, arg, from_code, to_code;
835 {
836 struct charset *cs;
837 unsigned from, to;
838
839 CHECK_CHARSET_GET_CHARSET (charset, cs);
840 if (NILP (from_code))
841 from = CHARSET_MIN_CODE (cs);
842 else
843 {
844 CHECK_NATNUM (from_code);
845 from = XINT (from_code);
846 if (from < CHARSET_MIN_CODE (cs))
847 from = CHARSET_MIN_CODE (cs);
848 }
849 if (NILP (to_code))
850 to = CHARSET_MAX_CODE (cs);
851 else
852 {
853 CHECK_NATNUM (to_code);
854 to = XINT (to_code);
855 if (to > CHARSET_MAX_CODE (cs))
856 to = CHARSET_MAX_CODE (cs);
857 }
858 map_charset_chars (NULL, function, arg, cs, from, to);
859 return Qnil;
860 }
861
862
863 /* Define a charset according to the arguments. The Nth argument is
864 the Nth attribute of the charset (the last attribute `charset-id'
865 is not included). See the docstring of `define-charset' for the
866 detail. */
867
868 DEFUN ("define-charset-internal", Fdefine_charset_internal,
869 Sdefine_charset_internal, charset_arg_max, MANY, 0,
870 doc: /* For internal use only.
871 usage: (define-charset-internal ...) */)
872 (nargs, args)
873 int nargs;
874 Lisp_Object *args;
875 {
876 /* Charset attr vector. */
877 Lisp_Object attrs;
878 Lisp_Object val;
879 unsigned hash_code;
880 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
881 int i, j;
882 struct charset charset;
883 int id;
884 int dimension;
885 int new_definition_p;
886 int nchars;
887
888 if (nargs != charset_arg_max)
889 return Fsignal (Qwrong_number_of_arguments,
890 Fcons (intern ("define-charset-internal"),
891 make_number (nargs)));
892
893 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
894
895 CHECK_SYMBOL (args[charset_arg_name]);
896 ASET (attrs, charset_name, args[charset_arg_name]);
897
898 val = args[charset_arg_code_space];
899 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
900 {
901 int min_byte, max_byte;
902
903 min_byte = XINT (Faref (val, make_number (i * 2)));
904 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
905 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
906 error ("Invalid :code-space value");
907 charset.code_space[i * 4] = min_byte;
908 charset.code_space[i * 4 + 1] = max_byte;
909 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
910 nchars *= charset.code_space[i * 4 + 2];
911 charset.code_space[i * 4 + 3] = nchars;
912 if (max_byte > 0)
913 dimension = i + 1;
914 }
915
916 val = args[charset_arg_dimension];
917 if (NILP (val))
918 charset.dimension = dimension;
919 else
920 {
921 CHECK_NATNUM (val);
922 charset.dimension = XINT (val);
923 if (charset.dimension < 1 || charset.dimension > 4)
924 args_out_of_range_3 (val, make_number (1), make_number (4));
925 }
926
927 charset.code_linear_p
928 = (charset.dimension == 1
929 || (charset.code_space[2] == 256
930 && (charset.dimension == 2
931 || (charset.code_space[6] == 256
932 && (charset.dimension == 3
933 || charset.code_space[10] == 256)))));
934
935 if (! charset.code_linear_p)
936 {
937 charset.code_space_mask = (unsigned char *) xmalloc (256);
938 memset (charset.code_space_mask, 0, 256);
939 for (i = 0; i < 4; i++)
940 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
941 j++)
942 charset.code_space_mask[j] |= (1 << i);
943 }
944
945 charset.iso_chars_96 = charset.code_space[2] == 96;
946
947 charset.min_code = (charset.code_space[0]
948 | (charset.code_space[4] << 8)
949 | (charset.code_space[8] << 16)
950 | (charset.code_space[12] << 24));
951 charset.max_code = (charset.code_space[1]
952 | (charset.code_space[5] << 8)
953 | (charset.code_space[9] << 16)
954 | (charset.code_space[13] << 24));
955 charset.char_index_offset = 0;
956
957 val = args[charset_arg_min_code];
958 if (! NILP (val))
959 {
960 unsigned code;
961
962 if (INTEGERP (val))
963 code = XINT (val);
964 else
965 {
966 CHECK_CONS (val);
967 CHECK_NUMBER_CAR (val);
968 CHECK_NUMBER_CDR (val);
969 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
970 }
971 if (code < charset.min_code
972 || code > charset.max_code)
973 args_out_of_range_3 (make_number (charset.min_code),
974 make_number (charset.max_code), val);
975 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
976 charset.min_code = code;
977 }
978
979 val = args[charset_arg_max_code];
980 if (! NILP (val))
981 {
982 unsigned code;
983
984 if (INTEGERP (val))
985 code = XINT (val);
986 else
987 {
988 CHECK_CONS (val);
989 CHECK_NUMBER_CAR (val);
990 CHECK_NUMBER_CDR (val);
991 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
992 }
993 if (code < charset.min_code
994 || code > charset.max_code)
995 args_out_of_range_3 (make_number (charset.min_code),
996 make_number (charset.max_code), val);
997 charset.max_code = code;
998 }
999
1000 charset.compact_codes_p = charset.max_code < 0x10000;
1001
1002 val = args[charset_arg_invalid_code];
1003 if (NILP (val))
1004 {
1005 if (charset.min_code > 0)
1006 charset.invalid_code = 0;
1007 else
1008 {
1009 XSETINT (val, charset.max_code + 1);
1010 if (XINT (val) == charset.max_code + 1)
1011 charset.invalid_code = charset.max_code + 1;
1012 else
1013 error ("Attribute :invalid-code must be specified");
1014 }
1015 }
1016 else
1017 {
1018 CHECK_NATNUM (val);
1019 charset.invalid_code = XFASTINT (val);
1020 }
1021
1022 val = args[charset_arg_iso_final];
1023 if (NILP (val))
1024 charset.iso_final = -1;
1025 else
1026 {
1027 CHECK_NUMBER (val);
1028 if (XINT (val) < '0' || XINT (val) > 127)
1029 error ("Invalid iso-final-char: %d", XINT (val));
1030 charset.iso_final = XINT (val);
1031 }
1032
1033 val = args[charset_arg_iso_revision];
1034 if (NILP (val))
1035 charset.iso_revision = -1;
1036 else
1037 {
1038 CHECK_NUMBER (val);
1039 if (XINT (val) > 63)
1040 args_out_of_range (make_number (63), val);
1041 charset.iso_revision = XINT (val);
1042 }
1043
1044 val = args[charset_arg_emacs_mule_id];
1045 if (NILP (val))
1046 charset.emacs_mule_id = -1;
1047 else
1048 {
1049 CHECK_NATNUM (val);
1050 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1051 error ("Invalid emacs-mule-id: %d", XINT (val));
1052 charset.emacs_mule_id = XINT (val);
1053 }
1054
1055 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1056
1057 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1058
1059 charset.unified_p = 0;
1060
1061 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1062
1063 if (! NILP (args[charset_arg_code_offset]))
1064 {
1065 val = args[charset_arg_code_offset];
1066 CHECK_NUMBER (val);
1067
1068 charset.method = CHARSET_METHOD_OFFSET;
1069 charset.code_offset = XINT (val);
1070
1071 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1072 charset.min_char = i + charset.code_offset;
1073 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1074 charset.max_char = i + charset.code_offset;
1075 if (charset.max_char > MAX_CHAR)
1076 error ("Unsupported max char: %d", charset.max_char);
1077
1078 i = (charset.min_char >> 7) << 7;
1079 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1080 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1081 i = (i >> 12) << 12;
1082 for (; i <= charset.max_char; i += 0x1000)
1083 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1084 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1085 charset.ascii_compatible_p = 1;
1086 }
1087 else if (! NILP (args[charset_arg_map]))
1088 {
1089 val = args[charset_arg_map];
1090 ASET (attrs, charset_map, val);
1091 charset.method = CHARSET_METHOD_MAP;
1092 }
1093 else if (! NILP (args[charset_arg_subset]))
1094 {
1095 Lisp_Object parent;
1096 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1097 struct charset *parent_charset;
1098
1099 val = args[charset_arg_subset];
1100 parent = Fcar (val);
1101 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1102 parent_min_code = Fnth (make_number (1), val);
1103 CHECK_NATNUM (parent_min_code);
1104 parent_max_code = Fnth (make_number (2), val);
1105 CHECK_NATNUM (parent_max_code);
1106 parent_code_offset = Fnth (make_number (3), val);
1107 CHECK_NUMBER (parent_code_offset);
1108 val = Fmake_vector (make_number (4), Qnil);
1109 ASET (val, 0, make_number (parent_charset->id));
1110 ASET (val, 1, parent_min_code);
1111 ASET (val, 2, parent_max_code);
1112 ASET (val, 3, parent_code_offset);
1113 ASET (attrs, charset_subset, val);
1114
1115 charset.method = CHARSET_METHOD_SUBSET;
1116 /* Here, we just copy the parent's fast_map. It's not accurate,
1117 but at least it works for quickly detecting which character
1118 DOESN'T belong to this charset. */
1119 for (i = 0; i < 190; i++)
1120 charset.fast_map[i] = parent_charset->fast_map[i];
1121
1122 /* We also copy these for parents. */
1123 charset.min_char = parent_charset->min_char;
1124 charset.max_char = parent_charset->max_char;
1125 }
1126 else if (! NILP (args[charset_arg_superset]))
1127 {
1128 val = args[charset_arg_superset];
1129 charset.method = CHARSET_METHOD_SUPERSET;
1130 val = Fcopy_sequence (val);
1131 ASET (attrs, charset_superset, val);
1132
1133 charset.min_char = MAX_CHAR;
1134 charset.max_char = 0;
1135 for (; ! NILP (val); val = Fcdr (val))
1136 {
1137 Lisp_Object elt, car_part, cdr_part;
1138 int this_id, offset;
1139 struct charset *this_charset;
1140
1141 elt = Fcar (val);
1142 if (CONSP (elt))
1143 {
1144 car_part = XCAR (elt);
1145 cdr_part = XCDR (elt);
1146 CHECK_CHARSET_GET_ID (car_part, this_id);
1147 CHECK_NUMBER (cdr_part);
1148 offset = XINT (cdr_part);
1149 }
1150 else
1151 {
1152 CHECK_CHARSET_GET_ID (elt, this_id);
1153 offset = 0;
1154 }
1155 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1156
1157 this_charset = CHARSET_FROM_ID (this_id);
1158 if (charset.min_char > this_charset->min_char)
1159 charset.min_char = this_charset->min_char;
1160 if (charset.max_char < this_charset->max_char)
1161 charset.max_char = this_charset->max_char;
1162 for (i = 0; i < 190; i++)
1163 charset.fast_map[i] |= this_charset->fast_map[i];
1164 }
1165 }
1166 else
1167 error ("None of :code-offset, :map, :parents are specified");
1168
1169 val = args[charset_arg_unify_map];
1170 if (! NILP (val) && !STRINGP (val))
1171 CHECK_VECTOR (val);
1172 ASET (attrs, charset_unify_map, val);
1173
1174 CHECK_LIST (args[charset_arg_plist]);
1175 ASET (attrs, charset_plist, args[charset_arg_plist]);
1176
1177 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1178 &hash_code);
1179 if (charset.hash_index >= 0)
1180 {
1181 new_definition_p = 0;
1182 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1183 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1184 }
1185 else
1186 {
1187 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1188 hash_code);
1189 if (charset_table_used == charset_table_size)
1190 {
1191 struct charset *new_table
1192 = (struct charset *) xmalloc (sizeof (struct charset)
1193 * (charset_table_size + 16));
1194 memcpy (new_table, charset_table,
1195 sizeof (struct charset) * charset_table_size);
1196 charset_table_size += 16;
1197 charset_table = new_table;
1198 }
1199 id = charset_table_used++;
1200 new_definition_p = 1;
1201 }
1202
1203 ASET (attrs, charset_id, make_number (id));
1204 charset.id = id;
1205 charset_table[id] = charset;
1206
1207 if (charset.method == CHARSET_METHOD_MAP)
1208 {
1209 load_charset (&charset, 0);
1210 charset_table[id] = charset;
1211 }
1212
1213 if (charset.iso_final >= 0)
1214 {
1215 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1216 charset.iso_final) = id;
1217 if (new_definition_p)
1218 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1219 Fcons (make_number (id), Qnil));
1220 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1221 charset_jisx0201_roman = id;
1222 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1223 charset_jisx0208_1978 = id;
1224 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1225 charset_jisx0208 = id;
1226 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1227 charset_ksc5601 = id;
1228 }
1229
1230 if (charset.emacs_mule_id >= 0)
1231 {
1232 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1233 if (charset.emacs_mule_id < 0xA0)
1234 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1235 else
1236 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1237 if (new_definition_p)
1238 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1239 Fcons (make_number (id), Qnil));
1240 }
1241
1242 if (new_definition_p)
1243 {
1244 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1245 if (charset.supplementary_p)
1246 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1247 Fcons (make_number (id), Qnil));
1248 else
1249 {
1250 Lisp_Object tail;
1251
1252 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1253 {
1254 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1255
1256 if (cs->supplementary_p)
1257 break;
1258 }
1259 if (EQ (tail, Vcharset_ordered_list))
1260 Vcharset_ordered_list = Fcons (make_number (id),
1261 Vcharset_ordered_list);
1262 else if (NILP (tail))
1263 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1264 Fcons (make_number (id), Qnil));
1265 else
1266 {
1267 val = Fcons (XCAR (tail), XCDR (tail));
1268 XSETCDR (tail, val);
1269 XSETCAR (tail, make_number (id));
1270 }
1271 }
1272 charset_ordered_list_tick++;
1273 }
1274
1275 return Qnil;
1276 }
1277
1278
1279 /* Same as Fdefine_charset_internal but arguments are more convenient
1280 to call from C (typically in syms_of_charset). This can define a
1281 charset of `offset' method only. Return the ID of the new
1282 charset. */
1283
1284 static int
1285 define_charset_internal (Lisp_Object name,
1286 int dimension,
1287 unsigned char *code_space,
1288 unsigned min_code, unsigned max_code,
1289 int iso_final, int iso_revision, int emacs_mule_id,
1290 int ascii_compatible, int supplementary,
1291 int code_offset)
1292 {
1293 Lisp_Object args[charset_arg_max];
1294 Lisp_Object plist[14];
1295 Lisp_Object val;
1296 int i;
1297
1298 args[charset_arg_name] = name;
1299 args[charset_arg_dimension] = make_number (dimension);
1300 val = Fmake_vector (make_number (8), make_number (0));
1301 for (i = 0; i < 8; i++)
1302 ASET (val, i, make_number (code_space[i]));
1303 args[charset_arg_code_space] = val;
1304 args[charset_arg_min_code] = make_number (min_code);
1305 args[charset_arg_max_code] = make_number (max_code);
1306 args[charset_arg_iso_final]
1307 = (iso_final < 0 ? Qnil : make_number (iso_final));
1308 args[charset_arg_iso_revision] = make_number (iso_revision);
1309 args[charset_arg_emacs_mule_id]
1310 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1311 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1312 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1313 args[charset_arg_invalid_code] = Qnil;
1314 args[charset_arg_code_offset] = make_number (code_offset);
1315 args[charset_arg_map] = Qnil;
1316 args[charset_arg_subset] = Qnil;
1317 args[charset_arg_superset] = Qnil;
1318 args[charset_arg_unify_map] = Qnil;
1319
1320 plist[0] = intern_c_string (":name");
1321 plist[1] = args[charset_arg_name];
1322 plist[2] = intern_c_string (":dimension");
1323 plist[3] = args[charset_arg_dimension];
1324 plist[4] = intern_c_string (":code-space");
1325 plist[5] = args[charset_arg_code_space];
1326 plist[6] = intern_c_string (":iso-final-char");
1327 plist[7] = args[charset_arg_iso_final];
1328 plist[8] = intern_c_string (":emacs-mule-id");
1329 plist[9] = args[charset_arg_emacs_mule_id];
1330 plist[10] = intern_c_string (":ascii-compatible-p");
1331 plist[11] = args[charset_arg_ascii_compatible_p];
1332 plist[12] = intern_c_string (":code-offset");
1333 plist[13] = args[charset_arg_code_offset];
1334
1335 args[charset_arg_plist] = Flist (14, plist);
1336 Fdefine_charset_internal (charset_arg_max, args);
1337
1338 return XINT (CHARSET_SYMBOL_ID (name));
1339 }
1340
1341
1342 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1343 Sdefine_charset_alias, 2, 2, 0,
1344 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1345 (alias, charset)
1346 Lisp_Object alias, charset;
1347 {
1348 Lisp_Object attr;
1349
1350 CHECK_CHARSET_GET_ATTR (charset, attr);
1351 Fputhash (alias, attr, Vcharset_hash_table);
1352 Vcharset_list = Fcons (alias, Vcharset_list);
1353 return Qnil;
1354 }
1355
1356
1357 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1358 doc: /* Return the property list of CHARSET. */)
1359 (charset)
1360 Lisp_Object charset;
1361 {
1362 Lisp_Object attrs;
1363
1364 CHECK_CHARSET_GET_ATTR (charset, attrs);
1365 return CHARSET_ATTR_PLIST (attrs);
1366 }
1367
1368
1369 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1370 doc: /* Set CHARSET's property list to PLIST. */)
1371 (charset, plist)
1372 Lisp_Object charset, plist;
1373 {
1374 Lisp_Object attrs;
1375
1376 CHECK_CHARSET_GET_ATTR (charset, attrs);
1377 CHARSET_ATTR_PLIST (attrs) = plist;
1378 return plist;
1379 }
1380
1381
1382 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1383 doc: /* Unify characters of CHARSET with Unicode.
1384 This means reading the relevant file and installing the table defined
1385 by CHARSET's `:unify-map' property.
1386
1387 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1388 the same meaning as the `:unify-map' attribute in the function
1389 `define-charset' (which see).
1390
1391 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1392 (charset, unify_map, deunify)
1393 Lisp_Object charset, unify_map, deunify;
1394 {
1395 int id;
1396 struct charset *cs;
1397
1398 CHECK_CHARSET_GET_ID (charset, id);
1399 cs = CHARSET_FROM_ID (id);
1400 if (NILP (deunify)
1401 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1402 : ! CHARSET_UNIFIED_P (cs))
1403 return Qnil;
1404
1405 CHARSET_UNIFIED_P (cs) = 0;
1406 if (NILP (deunify))
1407 {
1408 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1409 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1410 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1411 if (NILP (unify_map))
1412 unify_map = CHARSET_UNIFY_MAP (cs);
1413 else
1414 {
1415 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1416 signal_error ("Bad unify-map", unify_map);
1417 CHARSET_UNIFY_MAP (cs) = unify_map;
1418 }
1419 if (NILP (Vchar_unify_table))
1420 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1421 char_table_set_range (Vchar_unify_table,
1422 cs->min_char, cs->max_char, charset);
1423 CHARSET_UNIFIED_P (cs) = 1;
1424 }
1425 else if (CHAR_TABLE_P (Vchar_unify_table))
1426 {
1427 int min_code = CHARSET_MIN_CODE (cs);
1428 int max_code = CHARSET_MAX_CODE (cs);
1429 int min_char = DECODE_CHAR (cs, min_code);
1430 int max_char = DECODE_CHAR (cs, max_code);
1431
1432 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1433 }
1434
1435 return Qnil;
1436 }
1437
1438 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1439 Sget_unused_iso_final_char, 2, 2, 0,
1440 doc: /*
1441 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1442 DIMENSION is the number of bytes to represent a character: 1 or 2.
1443 CHARS is the number of characters in a dimension: 94 or 96.
1444
1445 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1446 If there's no unused final char for the specified kind of charset,
1447 return nil. */)
1448 (dimension, chars)
1449 Lisp_Object dimension, chars;
1450 {
1451 int final_char;
1452
1453 CHECK_NUMBER (dimension);
1454 CHECK_NUMBER (chars);
1455 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1456 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1457 if (XINT (chars) != 94 && XINT (chars) != 96)
1458 args_out_of_range_3 (chars, make_number (94), make_number (96));
1459 for (final_char = '0'; final_char <= '?'; final_char++)
1460 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1461 break;
1462 return (final_char <= '?' ? make_number (final_char) : Qnil);
1463 }
1464
1465 static void
1466 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1467 {
1468 CHECK_NATNUM (dimension);
1469 CHECK_NATNUM (chars);
1470 CHECK_NATNUM (final_char);
1471
1472 if (XINT (dimension) > 3)
1473 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1474 if (XINT (chars) != 94 && XINT (chars) != 96)
1475 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1476 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1477 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1478 }
1479
1480
1481 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1482 4, 4, 0,
1483 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1484
1485 On decoding by an ISO-2022 base coding system, when a charset
1486 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1487 if CHARSET is designated instead. */)
1488 (dimension, chars, final_char, charset)
1489 Lisp_Object dimension, chars, final_char, charset;
1490 {
1491 int id;
1492 int chars_flag;
1493
1494 CHECK_CHARSET_GET_ID (charset, id);
1495 check_iso_charset_parameter (dimension, chars, final_char);
1496 chars_flag = XINT (chars) == 96;
1497 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1498 return Qnil;
1499 }
1500
1501
1502 /* Return information about charsets in the text at PTR of NBYTES
1503 bytes, which are NCHARS characters. The value is:
1504
1505 0: Each character is represented by one byte. This is always
1506 true for a unibyte string. For a multibyte string, true if
1507 it contains only ASCII characters.
1508
1509 1: No charsets other than ascii, control-1, and latin-1 are
1510 found.
1511
1512 2: Otherwise.
1513 */
1514
1515 int
1516 string_xstring_p (Lisp_Object string)
1517 {
1518 const unsigned char *p = SDATA (string);
1519 const unsigned char *endp = p + SBYTES (string);
1520
1521 if (SCHARS (string) == SBYTES (string))
1522 return 0;
1523
1524 while (p < endp)
1525 {
1526 int c = STRING_CHAR_ADVANCE (p);
1527
1528 if (c >= 0x100)
1529 return 2;
1530 }
1531 return 1;
1532 }
1533
1534
1535 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1536
1537 CHARSETS is a vector. If Nth element is non-nil, it means the
1538 charset whose id is N is already found.
1539
1540 It may lookup a translation table TABLE if supplied. */
1541
1542 static void
1543 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1544 {
1545 const unsigned char *pend = ptr + nbytes;
1546
1547 if (nchars == nbytes)
1548 {
1549 if (multibyte)
1550 ASET (charsets, charset_ascii, Qt);
1551 else
1552 while (ptr < pend)
1553 {
1554 int c = *ptr++;
1555
1556 if (!NILP (table))
1557 c = translate_char (table, c);
1558 if (ASCII_BYTE_P (c))
1559 ASET (charsets, charset_ascii, Qt);
1560 else
1561 ASET (charsets, charset_eight_bit, Qt);
1562 }
1563 }
1564 else
1565 {
1566 while (ptr < pend)
1567 {
1568 int c = STRING_CHAR_ADVANCE (ptr);
1569 struct charset *charset;
1570
1571 if (!NILP (table))
1572 c = translate_char (table, c);
1573 charset = CHAR_CHARSET (c);
1574 ASET (charsets, CHARSET_ID (charset), Qt);
1575 }
1576 }
1577 }
1578
1579 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1580 2, 3, 0,
1581 doc: /* Return a list of charsets in the region between BEG and END.
1582 BEG and END are buffer positions.
1583 Optional arg TABLE if non-nil is a translation table to look up.
1584
1585 If the current buffer is unibyte, the returned list may contain
1586 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1587 (beg, end, table)
1588 Lisp_Object beg, end, table;
1589 {
1590 Lisp_Object charsets;
1591 EMACS_INT from, from_byte, to, stop, stop_byte;
1592 int i;
1593 Lisp_Object val;
1594 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1595
1596 validate_region (&beg, &end);
1597 from = XFASTINT (beg);
1598 stop = to = XFASTINT (end);
1599
1600 if (from < GPT && GPT < to)
1601 {
1602 stop = GPT;
1603 stop_byte = GPT_BYTE;
1604 }
1605 else
1606 stop_byte = CHAR_TO_BYTE (stop);
1607
1608 from_byte = CHAR_TO_BYTE (from);
1609
1610 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1611 while (1)
1612 {
1613 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1614 stop_byte - from_byte, charsets, table,
1615 multibyte);
1616 if (stop < to)
1617 {
1618 from = stop, from_byte = stop_byte;
1619 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1620 }
1621 else
1622 break;
1623 }
1624
1625 val = Qnil;
1626 for (i = charset_table_used - 1; i >= 0; i--)
1627 if (!NILP (AREF (charsets, i)))
1628 val = Fcons (CHARSET_NAME (charset_table + i), val);
1629 return val;
1630 }
1631
1632 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1633 1, 2, 0,
1634 doc: /* Return a list of charsets in STR.
1635 Optional arg TABLE if non-nil is a translation table to look up.
1636
1637 If STR is unibyte, the returned list may contain
1638 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1639 (str, table)
1640 Lisp_Object str, table;
1641 {
1642 Lisp_Object charsets;
1643 int i;
1644 Lisp_Object val;
1645
1646 CHECK_STRING (str);
1647
1648 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1649 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1650 charsets, table,
1651 STRING_MULTIBYTE (str));
1652 val = Qnil;
1653 for (i = charset_table_used - 1; i >= 0; i--)
1654 if (!NILP (AREF (charsets, i)))
1655 val = Fcons (CHARSET_NAME (charset_table + i), val);
1656 return val;
1657 }
1658
1659 \f
1660
1661 /* Return a unified character code for C (>= 0x110000). VAL is a
1662 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1663 charset symbol. */
1664 int
1665 maybe_unify_char (int c, Lisp_Object val)
1666 {
1667 struct charset *charset;
1668
1669 if (INTEGERP (val))
1670 return XINT (val);
1671 if (NILP (val))
1672 return c;
1673
1674 CHECK_CHARSET_GET_CHARSET (val, charset);
1675 load_charset (charset, 1);
1676 if (! inhibit_load_charset_map)
1677 {
1678 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1679 if (! NILP (val))
1680 c = XINT (val);
1681 }
1682 else
1683 {
1684 int code_index = c - CHARSET_CODE_OFFSET (charset);
1685 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1686
1687 if (unified > 0)
1688 c = unified;
1689 }
1690 return c;
1691 }
1692
1693
1694 /* Return a character correponding to the code-point CODE of
1695 CHARSET. */
1696
1697 int
1698 decode_char (struct charset *charset, unsigned int code)
1699 {
1700 int c, char_index;
1701 enum charset_method method = CHARSET_METHOD (charset);
1702
1703 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1704 return -1;
1705
1706 if (method == CHARSET_METHOD_SUBSET)
1707 {
1708 Lisp_Object subset_info;
1709
1710 subset_info = CHARSET_SUBSET (charset);
1711 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1712 code -= XINT (AREF (subset_info, 3));
1713 if (code < XFASTINT (AREF (subset_info, 1))
1714 || code > XFASTINT (AREF (subset_info, 2)))
1715 c = -1;
1716 else
1717 c = DECODE_CHAR (charset, code);
1718 }
1719 else if (method == CHARSET_METHOD_SUPERSET)
1720 {
1721 Lisp_Object parents;
1722
1723 parents = CHARSET_SUPERSET (charset);
1724 c = -1;
1725 for (; CONSP (parents); parents = XCDR (parents))
1726 {
1727 int id = XINT (XCAR (XCAR (parents)));
1728 int code_offset = XINT (XCDR (XCAR (parents)));
1729 unsigned this_code = code - code_offset;
1730
1731 charset = CHARSET_FROM_ID (id);
1732 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1733 break;
1734 }
1735 }
1736 else
1737 {
1738 char_index = CODE_POINT_TO_INDEX (charset, code);
1739 if (char_index < 0)
1740 return -1;
1741
1742 if (method == CHARSET_METHOD_MAP)
1743 {
1744 Lisp_Object decoder;
1745
1746 decoder = CHARSET_DECODER (charset);
1747 if (! VECTORP (decoder))
1748 {
1749 load_charset (charset, 1);
1750 decoder = CHARSET_DECODER (charset);
1751 }
1752 if (VECTORP (decoder))
1753 c = XINT (AREF (decoder, char_index));
1754 else
1755 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1756 }
1757 else /* method == CHARSET_METHOD_OFFSET */
1758 {
1759 c = char_index + CHARSET_CODE_OFFSET (charset);
1760 if (CHARSET_UNIFIED_P (charset)
1761 && c > MAX_UNICODE_CHAR)
1762 MAYBE_UNIFY_CHAR (c);
1763 }
1764 }
1765
1766 return c;
1767 }
1768
1769 /* Variable used temporarily by the macro ENCODE_CHAR. */
1770 Lisp_Object charset_work;
1771
1772 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1773 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1774 use CHARSET's strict_max_char instead of max_char. */
1775
1776 unsigned
1777 encode_char (struct charset *charset, int c)
1778 {
1779 unsigned code;
1780 enum charset_method method = CHARSET_METHOD (charset);
1781
1782 if (CHARSET_UNIFIED_P (charset))
1783 {
1784 Lisp_Object deunifier;
1785 int code_index = -1;
1786
1787 deunifier = CHARSET_DEUNIFIER (charset);
1788 if (! CHAR_TABLE_P (deunifier))
1789 {
1790 load_charset (charset, 2);
1791 deunifier = CHARSET_DEUNIFIER (charset);
1792 }
1793 if (CHAR_TABLE_P (deunifier))
1794 {
1795 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1796
1797 if (INTEGERP (deunified))
1798 code_index = XINT (deunified);
1799 }
1800 else
1801 {
1802 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1803 }
1804 if (code_index >= 0)
1805 c = CHARSET_CODE_OFFSET (charset) + code_index;
1806 }
1807
1808 if (method == CHARSET_METHOD_SUBSET)
1809 {
1810 Lisp_Object subset_info;
1811 struct charset *this_charset;
1812
1813 subset_info = CHARSET_SUBSET (charset);
1814 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1815 code = ENCODE_CHAR (this_charset, c);
1816 if (code == CHARSET_INVALID_CODE (this_charset)
1817 || code < XFASTINT (AREF (subset_info, 1))
1818 || code > XFASTINT (AREF (subset_info, 2)))
1819 return CHARSET_INVALID_CODE (charset);
1820 code += XINT (AREF (subset_info, 3));
1821 return code;
1822 }
1823
1824 if (method == CHARSET_METHOD_SUPERSET)
1825 {
1826 Lisp_Object parents;
1827
1828 parents = CHARSET_SUPERSET (charset);
1829 for (; CONSP (parents); parents = XCDR (parents))
1830 {
1831 int id = XINT (XCAR (XCAR (parents)));
1832 int code_offset = XINT (XCDR (XCAR (parents)));
1833 struct charset *this_charset = CHARSET_FROM_ID (id);
1834
1835 code = ENCODE_CHAR (this_charset, c);
1836 if (code != CHARSET_INVALID_CODE (this_charset))
1837 return code + code_offset;
1838 }
1839 return CHARSET_INVALID_CODE (charset);
1840 }
1841
1842 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1843 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1844 return CHARSET_INVALID_CODE (charset);
1845
1846 if (method == CHARSET_METHOD_MAP)
1847 {
1848 Lisp_Object encoder;
1849 Lisp_Object val;
1850
1851 encoder = CHARSET_ENCODER (charset);
1852 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1853 {
1854 load_charset (charset, 2);
1855 encoder = CHARSET_ENCODER (charset);
1856 }
1857 if (CHAR_TABLE_P (encoder))
1858 {
1859 val = CHAR_TABLE_REF (encoder, c);
1860 if (NILP (val))
1861 return CHARSET_INVALID_CODE (charset);
1862 code = XINT (val);
1863 if (! CHARSET_COMPACT_CODES_P (charset))
1864 code = INDEX_TO_CODE_POINT (charset, code);
1865 }
1866 else
1867 {
1868 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1869 code = INDEX_TO_CODE_POINT (charset, code);
1870 }
1871 }
1872 else /* method == CHARSET_METHOD_OFFSET */
1873 {
1874 int code_index = c - CHARSET_CODE_OFFSET (charset);
1875
1876 code = INDEX_TO_CODE_POINT (charset, code_index);
1877 }
1878
1879 return code;
1880 }
1881
1882
1883 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1884 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1885 Return nil if CODE-POINT is not valid in CHARSET.
1886
1887 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1888
1889 Optional argument RESTRICTION specifies a way to map the pair of CCS
1890 and CODE-POINT to a character. Currently not supported and just ignored. */)
1891 (charset, code_point, restriction)
1892 Lisp_Object charset, code_point, restriction;
1893 {
1894 int c, id;
1895 unsigned code;
1896 struct charset *charsetp;
1897
1898 CHECK_CHARSET_GET_ID (charset, id);
1899 if (CONSP (code_point))
1900 {
1901 CHECK_NATNUM_CAR (code_point);
1902 CHECK_NATNUM_CDR (code_point);
1903 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1904 }
1905 else
1906 {
1907 CHECK_NATNUM (code_point);
1908 code = XINT (code_point);
1909 }
1910 charsetp = CHARSET_FROM_ID (id);
1911 c = DECODE_CHAR (charsetp, code);
1912 return (c >= 0 ? make_number (c) : Qnil);
1913 }
1914
1915
1916 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1917 doc: /* Encode the character CH into a code-point of CHARSET.
1918 Return nil if CHARSET doesn't include CH.
1919
1920 Optional argument RESTRICTION specifies a way to map CH to a
1921 code-point in CCS. Currently not supported and just ignored. */)
1922 (ch, charset, restriction)
1923 Lisp_Object ch, charset, restriction;
1924 {
1925 int id;
1926 unsigned code;
1927 struct charset *charsetp;
1928
1929 CHECK_CHARSET_GET_ID (charset, id);
1930 CHECK_NATNUM (ch);
1931 charsetp = CHARSET_FROM_ID (id);
1932 code = ENCODE_CHAR (charsetp, XINT (ch));
1933 if (code == CHARSET_INVALID_CODE (charsetp))
1934 return Qnil;
1935 if (code > 0x7FFFFFF)
1936 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1937 return make_number (code);
1938 }
1939
1940
1941 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1942 doc:
1943 /* Return a character of CHARSET whose position codes are CODEn.
1944
1945 CODE1 through CODE4 are optional, but if you don't supply sufficient
1946 position codes, it is assumed that the minimum code in each dimension
1947 is specified. */)
1948 (charset, code1, code2, code3, code4)
1949 Lisp_Object charset, code1, code2, code3, code4;
1950 {
1951 int id, dimension;
1952 struct charset *charsetp;
1953 unsigned code;
1954 int c;
1955
1956 CHECK_CHARSET_GET_ID (charset, id);
1957 charsetp = CHARSET_FROM_ID (id);
1958
1959 dimension = CHARSET_DIMENSION (charsetp);
1960 if (NILP (code1))
1961 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1962 ? 0 : CHARSET_MIN_CODE (charsetp));
1963 else
1964 {
1965 CHECK_NATNUM (code1);
1966 if (XFASTINT (code1) >= 0x100)
1967 args_out_of_range (make_number (0xFF), code1);
1968 code = XFASTINT (code1);
1969
1970 if (dimension > 1)
1971 {
1972 code <<= 8;
1973 if (NILP (code2))
1974 code |= charsetp->code_space[(dimension - 2) * 4];
1975 else
1976 {
1977 CHECK_NATNUM (code2);
1978 if (XFASTINT (code2) >= 0x100)
1979 args_out_of_range (make_number (0xFF), code2);
1980 code |= XFASTINT (code2);
1981 }
1982
1983 if (dimension > 2)
1984 {
1985 code <<= 8;
1986 if (NILP (code3))
1987 code |= charsetp->code_space[(dimension - 3) * 4];
1988 else
1989 {
1990 CHECK_NATNUM (code3);
1991 if (XFASTINT (code3) >= 0x100)
1992 args_out_of_range (make_number (0xFF), code3);
1993 code |= XFASTINT (code3);
1994 }
1995
1996 if (dimension > 3)
1997 {
1998 code <<= 8;
1999 if (NILP (code4))
2000 code |= charsetp->code_space[0];
2001 else
2002 {
2003 CHECK_NATNUM (code4);
2004 if (XFASTINT (code4) >= 0x100)
2005 args_out_of_range (make_number (0xFF), code4);
2006 code |= XFASTINT (code4);
2007 }
2008 }
2009 }
2010 }
2011 }
2012
2013 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2014 code &= 0x7F7F7F7F;
2015 c = DECODE_CHAR (charsetp, code);
2016 if (c < 0)
2017 error ("Invalid code(s)");
2018 return make_number (c);
2019 }
2020
2021
2022 /* Return the first charset in CHARSET_LIST that contains C.
2023 CHARSET_LIST is a list of charset IDs. If it is nil, use
2024 Vcharset_ordered_list. */
2025
2026 struct charset *
2027 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2028 {
2029 int maybe_null = 0;
2030
2031 if (NILP (charset_list))
2032 charset_list = Vcharset_ordered_list;
2033 else
2034 maybe_null = 1;
2035
2036 while (CONSP (charset_list))
2037 {
2038 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2039 unsigned code = ENCODE_CHAR (charset, c);
2040
2041 if (code != CHARSET_INVALID_CODE (charset))
2042 {
2043 if (code_return)
2044 *code_return = code;
2045 return charset;
2046 }
2047 charset_list = XCDR (charset_list);
2048 if (! maybe_null
2049 && c <= MAX_UNICODE_CHAR
2050 && EQ (charset_list, Vcharset_non_preferred_head))
2051 return CHARSET_FROM_ID (charset_unicode);
2052 }
2053 return (maybe_null ? NULL
2054 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2055 : CHARSET_FROM_ID (charset_eight_bit));
2056 }
2057
2058
2059 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2060 doc:
2061 /*Return list of charset and one to four position-codes of CH.
2062 The charset is decided by the current priority order of charsets.
2063 A position-code is a byte value of each dimension of the code-point of
2064 CH in the charset. */)
2065 (ch)
2066 Lisp_Object ch;
2067 {
2068 struct charset *charset;
2069 int c, dimension;
2070 unsigned code;
2071 Lisp_Object val;
2072
2073 CHECK_CHARACTER (ch);
2074 c = XFASTINT (ch);
2075 charset = CHAR_CHARSET (c);
2076 if (! charset)
2077 abort ();
2078 code = ENCODE_CHAR (charset, c);
2079 if (code == CHARSET_INVALID_CODE (charset))
2080 abort ();
2081 dimension = CHARSET_DIMENSION (charset);
2082 for (val = Qnil; dimension > 0; dimension--)
2083 {
2084 val = Fcons (make_number (code & 0xFF), val);
2085 code >>= 8;
2086 }
2087 return Fcons (CHARSET_NAME (charset), val);
2088 }
2089
2090
2091 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2092 doc: /* Return the charset of highest priority that contains CH.
2093 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2094 from which to find the charset. It may also be a coding system. In
2095 that case, find the charset from what supported by that coding system. */)
2096 (ch, restriction)
2097 Lisp_Object ch, restriction;
2098 {
2099 struct charset *charset;
2100
2101 CHECK_CHARACTER (ch);
2102 if (NILP (restriction))
2103 charset = CHAR_CHARSET (XINT (ch));
2104 else
2105 {
2106 Lisp_Object charset_list;
2107
2108 if (CONSP (restriction))
2109 {
2110 for (charset_list = Qnil; CONSP (restriction);
2111 restriction = XCDR (restriction))
2112 {
2113 int id;
2114
2115 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2116 charset_list = Fcons (make_number (id), charset_list);
2117 }
2118 charset_list = Fnreverse (charset_list);
2119 }
2120 else
2121 charset_list = coding_system_charset_list (restriction);
2122 charset = char_charset (XINT (ch), charset_list, NULL);
2123 if (! charset)
2124 return Qnil;
2125 }
2126 return (CHARSET_NAME (charset));
2127 }
2128
2129
2130 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2131 doc: /*
2132 Return charset of a character in the current buffer at position POS.
2133 If POS is nil, it defauls to the current point.
2134 If POS is out of range, the value is nil. */)
2135 (pos)
2136 Lisp_Object pos;
2137 {
2138 Lisp_Object ch;
2139 struct charset *charset;
2140
2141 ch = Fchar_after (pos);
2142 if (! INTEGERP (ch))
2143 return ch;
2144 charset = CHAR_CHARSET (XINT (ch));
2145 return (CHARSET_NAME (charset));
2146 }
2147
2148
2149 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2150 doc: /*
2151 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2152
2153 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2154 by their DIMENSION, CHARS, and FINAL-CHAR,
2155 whereas Emacs distinguishes them by charset symbol.
2156 See the documentation of the function `charset-info' for the meanings of
2157 DIMENSION, CHARS, and FINAL-CHAR. */)
2158 (dimension, chars, final_char)
2159 Lisp_Object dimension, chars, final_char;
2160 {
2161 int id;
2162 int chars_flag;
2163
2164 check_iso_charset_parameter (dimension, chars, final_char);
2165 chars_flag = XFASTINT (chars) == 96;
2166 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2167 XFASTINT (final_char));
2168 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2169 }
2170
2171
2172 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2173 0, 0, 0,
2174 doc: /*
2175 Internal use only.
2176 Clear temporary charset mapping tables.
2177 It should be called only from temacs invoked for dumping. */)
2178 ()
2179 {
2180 if (temp_charset_work)
2181 {
2182 free (temp_charset_work);
2183 temp_charset_work = NULL;
2184 }
2185
2186 if (CHAR_TABLE_P (Vchar_unify_table))
2187 Foptimize_char_table (Vchar_unify_table, Qnil);
2188
2189 return Qnil;
2190 }
2191
2192 DEFUN ("charset-priority-list", Fcharset_priority_list,
2193 Scharset_priority_list, 0, 1, 0,
2194 doc: /* Return the list of charsets ordered by priority.
2195 HIGHESTP non-nil means just return the highest priority one. */)
2196 (highestp)
2197 Lisp_Object highestp;
2198 {
2199 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2200
2201 if (!NILP (highestp))
2202 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2203
2204 while (!NILP (list))
2205 {
2206 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2207 list = XCDR (list);
2208 }
2209 return Fnreverse (val);
2210 }
2211
2212 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2213 1, MANY, 0,
2214 doc: /* Assign higher priority to the charsets given as arguments.
2215 usage: (set-charset-priority &rest charsets) */)
2216 (nargs, args)
2217 int nargs;
2218 Lisp_Object *args;
2219 {
2220 Lisp_Object new_head, old_list, arglist[2];
2221 Lisp_Object list_2022, list_emacs_mule;
2222 int i, id;
2223
2224 old_list = Fcopy_sequence (Vcharset_ordered_list);
2225 new_head = Qnil;
2226 for (i = 0; i < nargs; i++)
2227 {
2228 CHECK_CHARSET_GET_ID (args[i], id);
2229 if (! NILP (Fmemq (make_number (id), old_list)))
2230 {
2231 old_list = Fdelq (make_number (id), old_list);
2232 new_head = Fcons (make_number (id), new_head);
2233 }
2234 }
2235 arglist[0] = Fnreverse (new_head);
2236 arglist[1] = Vcharset_non_preferred_head = old_list;
2237 Vcharset_ordered_list = Fnconc (2, arglist);
2238 charset_ordered_list_tick++;
2239
2240 charset_unibyte = -1;
2241 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2242 CONSP (old_list); old_list = XCDR (old_list))
2243 {
2244 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2245 list_2022 = Fcons (XCAR (old_list), list_2022);
2246 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2247 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2248 if (charset_unibyte < 0)
2249 {
2250 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2251
2252 if (CHARSET_DIMENSION (charset) == 1
2253 && CHARSET_ASCII_COMPATIBLE_P (charset)
2254 && CHARSET_MAX_CHAR (charset) >= 0x80)
2255 charset_unibyte = CHARSET_ID (charset);
2256 }
2257 }
2258 Viso_2022_charset_list = Fnreverse (list_2022);
2259 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2260 if (charset_unibyte < 0)
2261 charset_unibyte = charset_iso_8859_1;
2262
2263 return Qnil;
2264 }
2265
2266 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2267 0, 1, 0,
2268 doc: /* Internal use only.
2269 Return charset identification number of CHARSET. */)
2270 (charset)
2271 Lisp_Object charset;
2272 {
2273 int id;
2274
2275 CHECK_CHARSET_GET_ID (charset, id);
2276 return make_number (id);
2277 }
2278
2279 \f
2280 void
2281 init_charset (void)
2282 {
2283 Lisp_Object tempdir;
2284 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2285 if (access ((char *) SDATA (tempdir), 0) < 0)
2286 {
2287 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2288 Emacs will not function correctly without the character map files.\n\
2289 Please check your installation!\n",
2290 tempdir);
2291 /* TODO should this be a fatal error? (Bug#909) */
2292 }
2293
2294 Vcharset_map_path = Fcons (tempdir, Qnil);
2295 }
2296
2297
2298 void
2299 init_charset_once (void)
2300 {
2301 int i, j, k;
2302
2303 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2304 for (j = 0; j < ISO_MAX_CHARS; j++)
2305 for (k = 0; k < ISO_MAX_FINAL; k++)
2306 iso_charset_table[i][j][k] = -1;
2307
2308 for (i = 0; i < 256; i++)
2309 emacs_mule_charset[i] = NULL;
2310
2311 charset_jisx0201_roman = -1;
2312 charset_jisx0208_1978 = -1;
2313 charset_jisx0208 = -1;
2314 charset_ksc5601 = -1;
2315 }
2316
2317 #ifdef emacs
2318
2319 void
2320 syms_of_charset (void)
2321 {
2322 DEFSYM (Qcharsetp, "charsetp");
2323
2324 DEFSYM (Qascii, "ascii");
2325 DEFSYM (Qunicode, "unicode");
2326 DEFSYM (Qemacs, "emacs");
2327 DEFSYM (Qeight_bit, "eight-bit");
2328 DEFSYM (Qiso_8859_1, "iso-8859-1");
2329
2330 DEFSYM (Qgl, "gl");
2331 DEFSYM (Qgr, "gr");
2332
2333 staticpro (&Vcharset_ordered_list);
2334 Vcharset_ordered_list = Qnil;
2335
2336 staticpro (&Viso_2022_charset_list);
2337 Viso_2022_charset_list = Qnil;
2338
2339 staticpro (&Vemacs_mule_charset_list);
2340 Vemacs_mule_charset_list = Qnil;
2341
2342 /* Don't staticpro them here. It's done in syms_of_fns. */
2343 QCtest = intern (":test");
2344 Qeq = intern ("eq");
2345
2346 staticpro (&Vcharset_hash_table);
2347 {
2348 Lisp_Object args[2];
2349 args[0] = QCtest;
2350 args[1] = Qeq;
2351 Vcharset_hash_table = Fmake_hash_table (2, args);
2352 }
2353
2354 charset_table_size = 128;
2355 charset_table = ((struct charset *)
2356 xmalloc (sizeof (struct charset) * charset_table_size));
2357 charset_table_used = 0;
2358
2359 defsubr (&Scharsetp);
2360 defsubr (&Smap_charset_chars);
2361 defsubr (&Sdefine_charset_internal);
2362 defsubr (&Sdefine_charset_alias);
2363 defsubr (&Scharset_plist);
2364 defsubr (&Sset_charset_plist);
2365 defsubr (&Sunify_charset);
2366 defsubr (&Sget_unused_iso_final_char);
2367 defsubr (&Sdeclare_equiv_charset);
2368 defsubr (&Sfind_charset_region);
2369 defsubr (&Sfind_charset_string);
2370 defsubr (&Sdecode_char);
2371 defsubr (&Sencode_char);
2372 defsubr (&Ssplit_char);
2373 defsubr (&Smake_char);
2374 defsubr (&Schar_charset);
2375 defsubr (&Scharset_after);
2376 defsubr (&Siso_charset);
2377 defsubr (&Sclear_charset_maps);
2378 defsubr (&Scharset_priority_list);
2379 defsubr (&Sset_charset_priority);
2380 defsubr (&Scharset_id_internal);
2381
2382 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2383 doc: /* *List of directories to search for charset map files. */);
2384 Vcharset_map_path = Qnil;
2385
2386 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2387 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2388 inhibit_load_charset_map = 0;
2389
2390 DEFVAR_LISP ("charset-list", &Vcharset_list,
2391 doc: /* List of all charsets ever defined. */);
2392 Vcharset_list = Qnil;
2393
2394 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2395 doc: /* ISO639 language mnemonic symbol for the current language environment.
2396 If the current language environment is for multiple languages (e.g. "Latin-1"),
2397 the value may be a list of mnemonics. */);
2398 Vcurrent_iso639_language = Qnil;
2399
2400 charset_ascii
2401 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2402 0, 127, 'B', -1, 0, 1, 0, 0);
2403 charset_iso_8859_1
2404 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2405 0, 255, -1, -1, -1, 1, 0, 0);
2406 charset_unicode
2407 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2408 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2409 charset_emacs
2410 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2411 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2412 charset_eight_bit
2413 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2414 128, 255, -1, 0, -1, 0, 1,
2415 MAX_5_BYTE_CHAR + 1);
2416 charset_unibyte = charset_iso_8859_1;
2417 }
2418
2419 #endif /* emacs */
2420
2421 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2422 (do not change this comment) */