(map_charset_chars): For a charset of `superset'
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77 Lisp_Object Qemacs;
78
79 /* The corresponding charsets. */
80 int charset_ascii;
81 int charset_eight_bit;
82 int charset_iso_8859_1;
83 int charset_unicode;
84 int charset_emacs;
85
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
90
91 /* Value of charset attribute `charset-iso-plane'. */
92 Lisp_Object Qgl, Qgr;
93
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
96
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
99
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
103
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
108
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
111
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
114
115 struct charset *emacs_mule_charset[256];
116
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
121 Lisp_Object Vcharset_map_path;
122
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map;
125
126 Lisp_Object Vcurrent_iso639_language;
127
128 /* Defined in chartab.c */
129 extern void
130 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
134
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
150 : -1)
151
152
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
155
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
169
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
172
173 static struct
174 {
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203 } *temp_charset_work;
204
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
227 \f
228
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded;
232
233 struct charset_map_entries
234 {
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
240 };
241
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
251
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
257
258 If CONTROL_FLAG is 2, setup the following tables.
259
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265 */
266
267 static void
268 load_charset_map (charset, entries, n_entries, control_flag)
269 struct charset *charset;
270 struct charset_map_entries *entries;
271 int n_entries;
272 int control_flag;
273 {
274 Lisp_Object vec, table;
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
278 int i;
279 unsigned char *fast_map = charset->fast_map;
280
281 if (n_entries <= 0)
282 return;
283
284 if (control_flag)
285 {
286 if (! inhibit_load_charset_map)
287 {
288 if (control_flag == 1)
289 {
290 if (charset->method == CHARSET_METHOD_MAP)
291 {
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
293
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
333 charset_map_loaded = 1;
334 }
335
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
339 {
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
344
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
352 {
353 to_index = from_index;
354 to_c = from_c;
355 }
356 else
357 {
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
360 }
361 if (from_index < 0 || to_index < 0)
362 continue;
363
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
368
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
418
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
421 }
422 }
423
424 if (control_flag == 0)
425 {
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
429 }
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
435 }
436
437
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
440
441 static INLINE unsigned
442 read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
445 {
446 int c;
447 unsigned n;
448
449 while ((c = getc (fp)) != EOF)
450 {
451 if (c == '#')
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
460 }
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
478 }
479
480 extern Lisp_Object Qfile_name_handler_alist;
481
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
484 0xAAAA 0xCCCC
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
487 0xAAAA-0xBBBB 0xCCCC
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
490
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
494 range.
495
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
498
499 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
500
501 static void
502 load_charset_map_from_file (charset, mapfile, control_flag)
503 struct charset *charset;
504 Lisp_Object mapfile;
505 int control_flag;
506 {
507 unsigned min_code = CHARSET_MIN_CODE (charset);
508 unsigned max_code = CHARSET_MAX_CODE (charset);
509 int fd;
510 FILE *fp;
511 int eof;
512 Lisp_Object suffixes;
513 struct charset_map_entries *head, *entries;
514 int n_entries;
515 int count = SPECPDL_INDEX ();
516
517 suffixes = Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil));
519
520 specbind (Qfile_name_handler_alist, Qnil);
521 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
522 unbind_to (count, Qnil);
523 if (fd < 0
524 || ! (fp = fdopen (fd, "r")))
525 error ("Failure in loading charset map: %S", SDATA (mapfile));
526
527 head = entries = ((struct charset_map_entries *)
528 alloca (sizeof (struct charset_map_entries)));
529 n_entries = 0;
530 eof = 0;
531 while (1)
532 {
533 unsigned from, to;
534 int c;
535 int idx;
536
537 from = read_hex (fp, &eof);
538 if (eof)
539 break;
540 if (getc (fp) == '-')
541 to = read_hex (fp, &eof);
542 else
543 to = from;
544 c = (int) read_hex (fp, &eof);
545
546 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
547 continue;
548
549 if (n_entries > 0 && (n_entries % 0x10000) == 0)
550 {
551 entries->next = ((struct charset_map_entries *)
552 alloca (sizeof (struct charset_map_entries)));
553 entries = entries->next;
554 }
555 idx = n_entries % 0x10000;
556 entries->entry[idx].from = from;
557 entries->entry[idx].to = to;
558 entries->entry[idx].c = c;
559 n_entries++;
560 }
561 fclose (fp);
562 close (fd);
563
564 load_charset_map (charset, head, n_entries, control_flag);
565 }
566
567 static void
568 load_charset_map_from_vector (charset, vec, control_flag)
569 struct charset *charset;
570 Lisp_Object vec;
571 int control_flag;
572 {
573 unsigned min_code = CHARSET_MIN_CODE (charset);
574 unsigned max_code = CHARSET_MAX_CODE (charset);
575 struct charset_map_entries *head, *entries;
576 int n_entries;
577 int len = ASIZE (vec);
578 int i;
579
580 if (len % 2 == 1)
581 {
582 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
583 return;
584 }
585
586 head = entries = ((struct charset_map_entries *)
587 alloca (sizeof (struct charset_map_entries)));
588 n_entries = 0;
589 for (i = 0; i < len; i += 2)
590 {
591 Lisp_Object val, val2;
592 unsigned from, to;
593 int c;
594 int idx;
595
596 val = AREF (vec, i);
597 if (CONSP (val))
598 {
599 val2 = XCDR (val);
600 val = XCAR (val);
601 CHECK_NATNUM (val);
602 CHECK_NATNUM (val2);
603 from = XFASTINT (val);
604 to = XFASTINT (val2);
605 }
606 else
607 {
608 CHECK_NATNUM (val);
609 from = to = XFASTINT (val);
610 }
611 val = AREF (vec, i + 1);
612 CHECK_NATNUM (val);
613 c = XFASTINT (val);
614
615 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
616 continue;
617
618 if (n_entries > 0 && (n_entries % 0x10000) == 0)
619 {
620 entries->next = ((struct charset_map_entries *)
621 alloca (sizeof (struct charset_map_entries)));
622 entries = entries->next;
623 }
624 idx = n_entries % 0x10000;
625 entries->entry[idx].from = from;
626 entries->entry[idx].to = to;
627 entries->entry[idx].c = c;
628 n_entries++;
629 }
630
631 load_charset_map (charset, head, n_entries, control_flag);
632 }
633
634
635 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
636 map it is (see the comment of load_charset_map for the detail). */
637
638 static void
639 load_charset (charset, control_flag)
640 struct charset *charset;
641 int control_flag;
642 {
643 Lisp_Object map;
644
645 if (inhibit_load_charset_map
646 && temp_charset_work
647 && charset == temp_charset_work->current
648 && (control_flag == 2 == temp_charset_work->for_encoder))
649 return;
650
651 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
652 map = CHARSET_MAP (charset);
653 else if (CHARSET_UNIFIED_P (charset))
654 map = CHARSET_UNIFY_MAP (charset);
655 if (STRINGP (map))
656 load_charset_map_from_file (charset, map, control_flag);
657 else
658 load_charset_map_from_vector (charset, map, control_flag);
659 }
660
661
662 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
663 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
664 (object)
665 Lisp_Object object;
666 {
667 return (CHARSETP (object) ? Qt : Qnil);
668 }
669
670
671 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
672 Lisp_Object function, Lisp_Object arg,
673 unsigned from, unsigned to));
674
675 void
676 map_charset_for_dump (c_function, function, arg, from, to)
677 void (*c_function) (Lisp_Object, Lisp_Object);
678 Lisp_Object function, arg;
679 unsigned from, to;
680 {
681 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
682 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
683 Lisp_Object range;
684 int c, stop;
685 struct gcpro gcpro1;
686
687 range = Fcons (Qnil, Qnil);
688 GCPRO1 (range);
689
690 c = temp_charset_work->min_char;
691 stop = (temp_charset_work->max_char < 0x20000
692 ? temp_charset_work->max_char : 0xFFFF);
693
694 while (1)
695 {
696 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
697
698 if (index >= from_idx && index <= to_idx)
699 {
700 if (NILP (XCAR (range)))
701 XSETCAR (range, make_number (c));
702 }
703 else if (! NILP (XCAR (range)))
704 {
705 XSETCDR (range, make_number (c - 1));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
710 XSETCAR (range, Qnil);
711 }
712 if (c == stop)
713 {
714 if (c == temp_charset_work->max_char)
715 {
716 if (! NILP (XCAR (range)))
717 {
718 XSETCDR (range, make_number (c));
719 if (c_function)
720 (*c_function) (arg, range);
721 else
722 call2 (function, range, arg);
723 }
724 break;
725 }
726 c = 0x1FFFF;
727 stop = temp_charset_work->max_char;
728 }
729 c++;
730 }
731 UNGCPRO;
732 }
733
734 void
735 map_charset_chars (c_function, function, arg,
736 charset, from, to)
737 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
738 Lisp_Object function, arg;
739 struct charset *charset;
740 unsigned from, to;
741 {
742 Lisp_Object range;
743 int partial;
744
745 partial = (from > CHARSET_MIN_CODE (charset)
746 || to < CHARSET_MAX_CODE (charset));
747
748 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
749 {
750 int from_idx = CODE_POINT_TO_INDEX (charset, from);
751 int to_idx = CODE_POINT_TO_INDEX (charset, to);
752 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
753 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
754
755 if (CHARSET_UNIFIED_P (charset))
756 {
757 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
758 load_charset (charset, 2);
759 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
760 map_char_table_for_charset (c_function, function,
761 CHARSET_DEUNIFIER (charset), arg,
762 partial ? charset : NULL, from, to);
763 else
764 map_charset_for_dump (c_function, function, arg, from, to);
765 }
766
767 range = Fcons (make_number (from_c), make_number (to_c));
768 if (NILP (function))
769 (*c_function) (arg, range);
770 else
771 call2 (function, range, arg);
772 }
773 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
774 {
775 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
776 load_charset (charset, 2);
777 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
778 map_char_table_for_charset (c_function, function,
779 CHARSET_ENCODER (charset), arg,
780 partial ? charset : NULL, from, to);
781 else
782 map_charset_for_dump (c_function, function, arg, from, to);
783 }
784 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
785 {
786 Lisp_Object subset_info;
787 int offset;
788
789 subset_info = CHARSET_SUBSET (charset);
790 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
791 offset = XINT (AREF (subset_info, 3));
792 from -= offset;
793 if (from < XFASTINT (AREF (subset_info, 1)))
794 from = XFASTINT (AREF (subset_info, 1));
795 to -= offset;
796 if (to > XFASTINT (AREF (subset_info, 2)))
797 to = XFASTINT (AREF (subset_info, 2));
798 map_charset_chars (c_function, function, arg, charset, from, to);
799 }
800 else /* i.e. CHARSET_METHOD_SUPERSET */
801 {
802 Lisp_Object parents;
803
804 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
805 parents = XCDR (parents))
806 {
807 int offset;
808 unsigned this_from, this_to;
809
810 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
811 offset = XINT (XCDR (XCAR (parents)));
812 this_from = from > offset ? from - offset : 0;
813 this_to = to > offset ? to - offset : 0;
814 if (this_from < CHARSET_MIN_CODE (charset))
815 this_from = CHARSET_MIN_CODE (charset);
816 if (this_to > CHARSET_MAX_CODE (charset))
817 this_to = CHARSET_MAX_CODE (charset);
818 map_charset_chars (c_function, function, arg, charset,
819 this_from, this_to);
820 }
821 }
822 }
823
824 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
825 doc: /* Call FUNCTION for all characters in CHARSET.
826 FUNCTION is called with an argument RANGE and the optional 3rd
827 argument ARG.
828
829 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
830 characters contained in CHARSET.
831
832 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
833 range of code points of target characters. */)
834 (function, charset, arg, from_code, to_code)
835 Lisp_Object function, charset, arg, from_code, to_code;
836 {
837 struct charset *cs;
838 unsigned from, to;
839
840 CHECK_CHARSET_GET_CHARSET (charset, cs);
841 if (NILP (from_code))
842 from = CHARSET_MIN_CODE (cs);
843 else
844 {
845 CHECK_NATNUM (from_code);
846 from = XINT (from_code);
847 if (from < CHARSET_MIN_CODE (cs))
848 from = CHARSET_MIN_CODE (cs);
849 }
850 if (NILP (to_code))
851 to = CHARSET_MAX_CODE (cs);
852 else
853 {
854 CHECK_NATNUM (to_code);
855 to = XINT (to_code);
856 if (to > CHARSET_MAX_CODE (cs))
857 to = CHARSET_MAX_CODE (cs);
858 }
859 map_charset_chars (NULL, function, arg, cs, from, to);
860 return Qnil;
861 }
862
863
864 /* Define a charset according to the arguments. The Nth argument is
865 the Nth attribute of the charset (the last attribute `charset-id'
866 is not included). See the docstring of `define-charset' for the
867 detail. */
868
869 DEFUN ("define-charset-internal", Fdefine_charset_internal,
870 Sdefine_charset_internal, charset_arg_max, MANY, 0,
871 doc: /* For internal use only.
872 usage: (define-charset-internal ...) */)
873 (nargs, args)
874 int nargs;
875 Lisp_Object *args;
876 {
877 /* Charset attr vector. */
878 Lisp_Object attrs;
879 Lisp_Object val;
880 unsigned hash_code;
881 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
882 int i, j;
883 struct charset charset;
884 int id;
885 int dimension;
886 int new_definition_p;
887 int nchars;
888
889 if (nargs != charset_arg_max)
890 return Fsignal (Qwrong_number_of_arguments,
891 Fcons (intern ("define-charset-internal"),
892 make_number (nargs)));
893
894 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
895
896 CHECK_SYMBOL (args[charset_arg_name]);
897 ASET (attrs, charset_name, args[charset_arg_name]);
898
899 val = args[charset_arg_code_space];
900 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
901 {
902 int min_byte, max_byte;
903
904 min_byte = XINT (Faref (val, make_number (i * 2)));
905 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
906 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
907 error ("Invalid :code-space value");
908 charset.code_space[i * 4] = min_byte;
909 charset.code_space[i * 4 + 1] = max_byte;
910 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
911 nchars *= charset.code_space[i * 4 + 2];
912 charset.code_space[i * 4 + 3] = nchars;
913 if (max_byte > 0)
914 dimension = i + 1;
915 }
916
917 val = args[charset_arg_dimension];
918 if (NILP (val))
919 charset.dimension = dimension;
920 else
921 {
922 CHECK_NATNUM (val);
923 charset.dimension = XINT (val);
924 if (charset.dimension < 1 || charset.dimension > 4)
925 args_out_of_range_3 (val, make_number (1), make_number (4));
926 }
927
928 charset.code_linear_p
929 = (charset.dimension == 1
930 || (charset.code_space[2] == 256
931 && (charset.dimension == 2
932 || (charset.code_space[6] == 256
933 && (charset.dimension == 3
934 || charset.code_space[10] == 256)))));
935
936 if (! charset.code_linear_p)
937 {
938 charset.code_space_mask = (unsigned char *) xmalloc (256);
939 bzero (charset.code_space_mask, 256);
940 for (i = 0; i < 4; i++)
941 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
942 j++)
943 charset.code_space_mask[j] |= (1 << i);
944 }
945
946 charset.iso_chars_96 = charset.code_space[2] == 96;
947
948 charset.min_code = (charset.code_space[0]
949 | (charset.code_space[4] << 8)
950 | (charset.code_space[8] << 16)
951 | (charset.code_space[12] << 24));
952 charset.max_code = (charset.code_space[1]
953 | (charset.code_space[5] << 8)
954 | (charset.code_space[9] << 16)
955 | (charset.code_space[13] << 24));
956 charset.char_index_offset = 0;
957
958 val = args[charset_arg_min_code];
959 if (! NILP (val))
960 {
961 unsigned code;
962
963 if (INTEGERP (val))
964 code = XINT (val);
965 else
966 {
967 CHECK_CONS (val);
968 CHECK_NUMBER_CAR (val);
969 CHECK_NUMBER_CDR (val);
970 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
971 }
972 if (code < charset.min_code
973 || code > charset.max_code)
974 args_out_of_range_3 (make_number (charset.min_code),
975 make_number (charset.max_code), val);
976 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
977 charset.min_code = code;
978 }
979
980 val = args[charset_arg_max_code];
981 if (! NILP (val))
982 {
983 unsigned code;
984
985 if (INTEGERP (val))
986 code = XINT (val);
987 else
988 {
989 CHECK_CONS (val);
990 CHECK_NUMBER_CAR (val);
991 CHECK_NUMBER_CDR (val);
992 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
993 }
994 if (code < charset.min_code
995 || code > charset.max_code)
996 args_out_of_range_3 (make_number (charset.min_code),
997 make_number (charset.max_code), val);
998 charset.max_code = code;
999 }
1000
1001 charset.compact_codes_p = charset.max_code < 0x10000;
1002
1003 val = args[charset_arg_invalid_code];
1004 if (NILP (val))
1005 {
1006 if (charset.min_code > 0)
1007 charset.invalid_code = 0;
1008 else
1009 {
1010 XSETINT (val, charset.max_code + 1);
1011 if (XINT (val) == charset.max_code + 1)
1012 charset.invalid_code = charset.max_code + 1;
1013 else
1014 error ("Attribute :invalid-code must be specified");
1015 }
1016 }
1017 else
1018 {
1019 CHECK_NATNUM (val);
1020 charset.invalid_code = XFASTINT (val);
1021 }
1022
1023 val = args[charset_arg_iso_final];
1024 if (NILP (val))
1025 charset.iso_final = -1;
1026 else
1027 {
1028 CHECK_NUMBER (val);
1029 if (XINT (val) < '0' || XINT (val) > 127)
1030 error ("Invalid iso-final-char: %d", XINT (val));
1031 charset.iso_final = XINT (val);
1032 }
1033
1034 val = args[charset_arg_iso_revision];
1035 if (NILP (val))
1036 charset.iso_revision = -1;
1037 else
1038 {
1039 CHECK_NUMBER (val);
1040 if (XINT (val) > 63)
1041 args_out_of_range (make_number (63), val);
1042 charset.iso_revision = XINT (val);
1043 }
1044
1045 val = args[charset_arg_emacs_mule_id];
1046 if (NILP (val))
1047 charset.emacs_mule_id = -1;
1048 else
1049 {
1050 CHECK_NATNUM (val);
1051 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1052 error ("Invalid emacs-mule-id: %d", XINT (val));
1053 charset.emacs_mule_id = XINT (val);
1054 }
1055
1056 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1057
1058 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1059
1060 charset.unified_p = 0;
1061
1062 bzero (charset.fast_map, sizeof (charset.fast_map));
1063
1064 if (! NILP (args[charset_arg_code_offset]))
1065 {
1066 val = args[charset_arg_code_offset];
1067 CHECK_NUMBER (val);
1068
1069 charset.method = CHARSET_METHOD_OFFSET;
1070 charset.code_offset = XINT (val);
1071
1072 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1073 charset.min_char = i + charset.code_offset;
1074 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1075 charset.max_char = i + charset.code_offset;
1076 if (charset.max_char > MAX_CHAR)
1077 error ("Unsupported max char: %d", charset.max_char);
1078
1079 i = (charset.min_char >> 7) << 7;
1080 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1081 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1082 i = (i >> 12) << 12;
1083 for (; i <= charset.max_char; i += 0x1000)
1084 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1085 }
1086 else if (! NILP (args[charset_arg_map]))
1087 {
1088 val = args[charset_arg_map];
1089 ASET (attrs, charset_map, val);
1090 charset.method = CHARSET_METHOD_MAP;
1091 }
1092 else if (! NILP (args[charset_arg_subset]))
1093 {
1094 Lisp_Object parent;
1095 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1096 struct charset *parent_charset;
1097
1098 val = args[charset_arg_subset];
1099 parent = Fcar (val);
1100 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1101 parent_min_code = Fnth (make_number (1), val);
1102 CHECK_NATNUM (parent_min_code);
1103 parent_max_code = Fnth (make_number (2), val);
1104 CHECK_NATNUM (parent_max_code);
1105 parent_code_offset = Fnth (make_number (3), val);
1106 CHECK_NUMBER (parent_code_offset);
1107 val = Fmake_vector (make_number (4), Qnil);
1108 ASET (val, 0, make_number (parent_charset->id));
1109 ASET (val, 1, parent_min_code);
1110 ASET (val, 2, parent_max_code);
1111 ASET (val, 3, parent_code_offset);
1112 ASET (attrs, charset_subset, val);
1113
1114 charset.method = CHARSET_METHOD_SUBSET;
1115 /* Here, we just copy the parent's fast_map. It's not accurate,
1116 but at least it works for quickly detecting which character
1117 DOESN'T belong to this charset. */
1118 for (i = 0; i < 190; i++)
1119 charset.fast_map[i] = parent_charset->fast_map[i];
1120
1121 /* We also copy these for parents. */
1122 charset.min_char = parent_charset->min_char;
1123 charset.max_char = parent_charset->max_char;
1124 }
1125 else if (! NILP (args[charset_arg_superset]))
1126 {
1127 val = args[charset_arg_superset];
1128 charset.method = CHARSET_METHOD_SUPERSET;
1129 val = Fcopy_sequence (val);
1130 ASET (attrs, charset_superset, val);
1131
1132 charset.min_char = MAX_CHAR;
1133 charset.max_char = 0;
1134 for (; ! NILP (val); val = Fcdr (val))
1135 {
1136 Lisp_Object elt, car_part, cdr_part;
1137 int this_id, offset;
1138 struct charset *this_charset;
1139
1140 elt = Fcar (val);
1141 if (CONSP (elt))
1142 {
1143 car_part = XCAR (elt);
1144 cdr_part = XCDR (elt);
1145 CHECK_CHARSET_GET_ID (car_part, this_id);
1146 CHECK_NUMBER (cdr_part);
1147 offset = XINT (cdr_part);
1148 }
1149 else
1150 {
1151 CHECK_CHARSET_GET_ID (elt, this_id);
1152 offset = 0;
1153 }
1154 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1155
1156 this_charset = CHARSET_FROM_ID (this_id);
1157 if (charset.min_char > this_charset->min_char)
1158 charset.min_char = this_charset->min_char;
1159 if (charset.max_char < this_charset->max_char)
1160 charset.max_char = this_charset->max_char;
1161 for (i = 0; i < 190; i++)
1162 charset.fast_map[i] |= this_charset->fast_map[i];
1163 }
1164 }
1165 else
1166 error ("None of :code-offset, :map, :parents are specified");
1167
1168 val = args[charset_arg_unify_map];
1169 if (! NILP (val) && !STRINGP (val))
1170 CHECK_VECTOR (val);
1171 ASET (attrs, charset_unify_map, val);
1172
1173 CHECK_LIST (args[charset_arg_plist]);
1174 ASET (attrs, charset_plist, args[charset_arg_plist]);
1175
1176 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1177 &hash_code);
1178 if (charset.hash_index >= 0)
1179 {
1180 new_definition_p = 0;
1181 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1182 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1183 }
1184 else
1185 {
1186 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1187 hash_code);
1188 if (charset_table_used == charset_table_size)
1189 {
1190 struct charset *new_table
1191 = (struct charset *) xmalloc (sizeof (struct charset)
1192 * (charset_table_size + 16));
1193 bcopy (charset_table, new_table,
1194 sizeof (struct charset) * charset_table_size);
1195 charset_table_size += 16;
1196 charset_table = new_table;
1197 }
1198 id = charset_table_used++;
1199 new_definition_p = 1;
1200 }
1201
1202 ASET (attrs, charset_id, make_number (id));
1203 charset.id = id;
1204 charset_table[id] = charset;
1205
1206 if (charset.method == CHARSET_METHOD_MAP)
1207 {
1208 load_charset (&charset, 0);
1209 charset_table[id] = charset;
1210 }
1211
1212 if (charset.iso_final >= 0)
1213 {
1214 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1215 charset.iso_final) = id;
1216 if (new_definition_p)
1217 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1218 Fcons (make_number (id), Qnil));
1219 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1220 charset_jisx0201_roman = id;
1221 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1222 charset_jisx0208_1978 = id;
1223 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1224 charset_jisx0208 = id;
1225 }
1226
1227 if (charset.emacs_mule_id >= 0)
1228 {
1229 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1230 if (charset.emacs_mule_id < 0xA0)
1231 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1232 else
1233 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1234 if (new_definition_p)
1235 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1236 Fcons (make_number (id), Qnil));
1237 }
1238
1239 if (new_definition_p)
1240 {
1241 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1242 if (charset.supplementary_p)
1243 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1244 Fcons (make_number (id), Qnil));
1245 else
1246 {
1247 Lisp_Object tail;
1248
1249 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1250 {
1251 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1252
1253 if (cs->supplementary_p)
1254 break;
1255 }
1256 if (EQ (tail, Vcharset_ordered_list))
1257 Vcharset_ordered_list = Fcons (make_number (id),
1258 Vcharset_ordered_list);
1259 else if (NILP (tail))
1260 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1261 Fcons (make_number (id), Qnil));
1262 else
1263 {
1264 val = Fcons (XCAR (tail), XCDR (tail));
1265 XSETCDR (tail, val);
1266 XSETCAR (tail, make_number (id));
1267 }
1268 }
1269 charset_ordered_list_tick++;
1270 }
1271
1272 return Qnil;
1273 }
1274
1275
1276 /* Same as Fdefine_charset_internal but arguments are more convenient
1277 to call from C (typically in syms_of_charset). This can define a
1278 charset of `offset' method only. Return the ID of the new
1279 charset. */
1280
1281 static int
1282 define_charset_internal (name, dimension, code_space, min_code, max_code,
1283 iso_final, iso_revision, emacs_mule_id,
1284 ascii_compatible, supplementary,
1285 code_offset)
1286 Lisp_Object name;
1287 int dimension;
1288 unsigned char *code_space;
1289 unsigned min_code, max_code;
1290 int iso_final, iso_revision, emacs_mule_id;
1291 int ascii_compatible, supplementary;
1292 int code_offset;
1293 {
1294 Lisp_Object args[charset_arg_max];
1295 Lisp_Object plist[14];
1296 Lisp_Object val;
1297 int i;
1298
1299 args[charset_arg_name] = name;
1300 args[charset_arg_dimension] = make_number (dimension);
1301 val = Fmake_vector (make_number (8), make_number (0));
1302 for (i = 0; i < 8; i++)
1303 ASET (val, i, make_number (code_space[i]));
1304 args[charset_arg_code_space] = val;
1305 args[charset_arg_min_code] = make_number (min_code);
1306 args[charset_arg_max_code] = make_number (max_code);
1307 args[charset_arg_iso_final]
1308 = (iso_final < 0 ? Qnil : make_number (iso_final));
1309 args[charset_arg_iso_revision] = make_number (iso_revision);
1310 args[charset_arg_emacs_mule_id]
1311 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1312 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1313 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1314 args[charset_arg_invalid_code] = Qnil;
1315 args[charset_arg_code_offset] = make_number (code_offset);
1316 args[charset_arg_map] = Qnil;
1317 args[charset_arg_subset] = Qnil;
1318 args[charset_arg_superset] = Qnil;
1319 args[charset_arg_unify_map] = Qnil;
1320
1321 plist[0] = intern (":name");
1322 plist[1] = args[charset_arg_name];
1323 plist[2] = intern (":dimension");
1324 plist[3] = args[charset_arg_dimension];
1325 plist[4] = intern (":code-space");
1326 plist[5] = args[charset_arg_code_space];
1327 plist[6] = intern (":iso-final-char");
1328 plist[7] = args[charset_arg_iso_final];
1329 plist[8] = intern (":emacs-mule-id");
1330 plist[9] = args[charset_arg_emacs_mule_id];
1331 plist[10] = intern (":ascii-compatible-p");
1332 plist[11] = args[charset_arg_ascii_compatible_p];
1333 plist[12] = intern (":code-offset");
1334 plist[13] = args[charset_arg_code_offset];
1335
1336 args[charset_arg_plist] = Flist (14, plist);
1337 Fdefine_charset_internal (charset_arg_max, args);
1338
1339 return XINT (CHARSET_SYMBOL_ID (name));
1340 }
1341
1342
1343 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1344 Sdefine_charset_alias, 2, 2, 0,
1345 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1346 (alias, charset)
1347 Lisp_Object alias, charset;
1348 {
1349 Lisp_Object attr;
1350
1351 CHECK_CHARSET_GET_ATTR (charset, attr);
1352 Fputhash (alias, attr, Vcharset_hash_table);
1353 Vcharset_list = Fcons (alias, Vcharset_list);
1354 return Qnil;
1355 }
1356
1357
1358 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1359 doc: /* Return the property list of CHARSET. */)
1360 (charset)
1361 Lisp_Object charset;
1362 {
1363 Lisp_Object attrs;
1364
1365 CHECK_CHARSET_GET_ATTR (charset, attrs);
1366 return CHARSET_ATTR_PLIST (attrs);
1367 }
1368
1369
1370 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1371 doc: /* Set CHARSET's property list to PLIST. */)
1372 (charset, plist)
1373 Lisp_Object charset, plist;
1374 {
1375 Lisp_Object attrs;
1376
1377 CHECK_CHARSET_GET_ATTR (charset, attrs);
1378 CHARSET_ATTR_PLIST (attrs) = plist;
1379 return plist;
1380 }
1381
1382
1383 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1384 doc: /* Unify characters of CHARSET with Unicode.
1385 This means reading the relevant file and installing the table defined
1386 by CHARSET's `:unify-map' property.
1387
1388 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1389 the same meaning as the `:unify-map' attribute in the function
1390 `define-charset' (which see).
1391
1392 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1393 (charset, unify_map, deunify)
1394 Lisp_Object charset, unify_map, deunify;
1395 {
1396 int id;
1397 struct charset *cs;
1398
1399 CHECK_CHARSET_GET_ID (charset, id);
1400 cs = CHARSET_FROM_ID (id);
1401 if (NILP (deunify)
1402 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1403 : ! CHARSET_UNIFIED_P (cs))
1404 return Qnil;
1405
1406 CHARSET_UNIFIED_P (cs) = 0;
1407 if (NILP (deunify))
1408 {
1409 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1410 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1411 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1412 if (NILP (unify_map))
1413 unify_map = CHARSET_UNIFY_MAP (cs);
1414 else
1415 {
1416 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1417 signal_error ("Bad unify-map", unify_map);
1418 CHARSET_UNIFY_MAP (cs) = unify_map;
1419 }
1420 if (NILP (Vchar_unify_table))
1421 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1422 char_table_set_range (Vchar_unify_table,
1423 cs->min_char, cs->max_char, charset);
1424 CHARSET_UNIFIED_P (cs) = 1;
1425 }
1426 else if (CHAR_TABLE_P (Vchar_unify_table))
1427 {
1428 int min_code = CHARSET_MIN_CODE (cs);
1429 int max_code = CHARSET_MAX_CODE (cs);
1430 int min_char = DECODE_CHAR (cs, min_code);
1431 int max_char = DECODE_CHAR (cs, max_code);
1432
1433 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1434 }
1435
1436 return Qnil;
1437 }
1438
1439 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1440 Sget_unused_iso_final_char, 2, 2, 0,
1441 doc: /*
1442 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1443 DIMENSION is the number of bytes to represent a character: 1 or 2.
1444 CHARS is the number of characters in a dimension: 94 or 96.
1445
1446 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1447 If there's no unused final char for the specified kind of charset,
1448 return nil. */)
1449 (dimension, chars)
1450 Lisp_Object dimension, chars;
1451 {
1452 int final_char;
1453
1454 CHECK_NUMBER (dimension);
1455 CHECK_NUMBER (chars);
1456 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1457 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1458 if (XINT (chars) != 94 && XINT (chars) != 96)
1459 args_out_of_range_3 (chars, make_number (94), make_number (96));
1460 for (final_char = '0'; final_char <= '?'; final_char++)
1461 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1462 break;
1463 return (final_char <= '?' ? make_number (final_char) : Qnil);
1464 }
1465
1466 static void
1467 check_iso_charset_parameter (dimension, chars, final_char)
1468 Lisp_Object dimension, chars, final_char;
1469 {
1470 CHECK_NATNUM (dimension);
1471 CHECK_NATNUM (chars);
1472 CHECK_NATNUM (final_char);
1473
1474 if (XINT (dimension) > 3)
1475 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1476 if (XINT (chars) != 94 && XINT (chars) != 96)
1477 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1478 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1479 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1480 }
1481
1482
1483 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1484 4, 4, 0,
1485 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1486
1487 On decoding by an ISO-2022 base coding system, when a charset
1488 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1489 if CHARSET is designated instead. */)
1490 (dimension, chars, final_char, charset)
1491 Lisp_Object dimension, chars, final_char, charset;
1492 {
1493 int id;
1494 int chars_flag;
1495
1496 CHECK_CHARSET_GET_ID (charset, id);
1497 check_iso_charset_parameter (dimension, chars, final_char);
1498 chars_flag = XINT (chars) == 96;
1499 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1500 return Qnil;
1501 }
1502
1503
1504 /* Return information about charsets in the text at PTR of NBYTES
1505 bytes, which are NCHARS characters. The value is:
1506
1507 0: Each character is represented by one byte. This is always
1508 true for a unibyte string. For a multibyte string, true if
1509 it contains only ASCII characters.
1510
1511 1: No charsets other than ascii, control-1, and latin-1 are
1512 found.
1513
1514 2: Otherwise.
1515 */
1516
1517 int
1518 string_xstring_p (string)
1519 Lisp_Object string;
1520 {
1521 const unsigned char *p = SDATA (string);
1522 const unsigned char *endp = p + SBYTES (string);
1523
1524 if (SCHARS (string) == SBYTES (string))
1525 return 0;
1526
1527 while (p < endp)
1528 {
1529 int c = STRING_CHAR_ADVANCE (p);
1530
1531 if (c >= 0x100)
1532 return 2;
1533 }
1534 return 1;
1535 }
1536
1537
1538 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1539
1540 CHARSETS is a vector. If Nth element is non-nil, it means the
1541 charset whose id is N is already found.
1542
1543 It may lookup a translation table TABLE if supplied. */
1544
1545 static void
1546 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1547 const unsigned char *ptr;
1548 EMACS_INT nchars, nbytes;
1549 Lisp_Object charsets, table;
1550 int multibyte;
1551 {
1552 const unsigned char *pend = ptr + nbytes;
1553
1554 if (nchars == nbytes)
1555 {
1556 if (multibyte)
1557 ASET (charsets, charset_ascii, Qt);
1558 else
1559 while (ptr < pend)
1560 {
1561 int c = *ptr++;
1562
1563 if (!NILP (table))
1564 c = translate_char (table, c);
1565 if (ASCII_BYTE_P (c))
1566 ASET (charsets, charset_ascii, Qt);
1567 else
1568 ASET (charsets, charset_eight_bit, Qt);
1569 }
1570 }
1571 else
1572 {
1573 while (ptr < pend)
1574 {
1575 int c = STRING_CHAR_ADVANCE (ptr);
1576 struct charset *charset;
1577
1578 if (!NILP (table))
1579 c = translate_char (table, c);
1580 charset = CHAR_CHARSET (c);
1581 ASET (charsets, CHARSET_ID (charset), Qt);
1582 }
1583 }
1584 }
1585
1586 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1587 2, 3, 0,
1588 doc: /* Return a list of charsets in the region between BEG and END.
1589 BEG and END are buffer positions.
1590 Optional arg TABLE if non-nil is a translation table to look up.
1591
1592 If the current buffer is unibyte, the returned list may contain
1593 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1594 (beg, end, table)
1595 Lisp_Object beg, end, table;
1596 {
1597 Lisp_Object charsets;
1598 EMACS_INT from, from_byte, to, stop, stop_byte;
1599 int i;
1600 Lisp_Object val;
1601 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1602
1603 validate_region (&beg, &end);
1604 from = XFASTINT (beg);
1605 stop = to = XFASTINT (end);
1606
1607 if (from < GPT && GPT < to)
1608 {
1609 stop = GPT;
1610 stop_byte = GPT_BYTE;
1611 }
1612 else
1613 stop_byte = CHAR_TO_BYTE (stop);
1614
1615 from_byte = CHAR_TO_BYTE (from);
1616
1617 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1618 while (1)
1619 {
1620 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1621 stop_byte - from_byte, charsets, table,
1622 multibyte);
1623 if (stop < to)
1624 {
1625 from = stop, from_byte = stop_byte;
1626 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1627 }
1628 else
1629 break;
1630 }
1631
1632 val = Qnil;
1633 for (i = charset_table_used - 1; i >= 0; i--)
1634 if (!NILP (AREF (charsets, i)))
1635 val = Fcons (CHARSET_NAME (charset_table + i), val);
1636 return val;
1637 }
1638
1639 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1640 1, 2, 0,
1641 doc: /* Return a list of charsets in STR.
1642 Optional arg TABLE if non-nil is a translation table to look up.
1643
1644 If STR is unibyte, the returned list may contain
1645 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1646 (str, table)
1647 Lisp_Object str, table;
1648 {
1649 Lisp_Object charsets;
1650 int i;
1651 Lisp_Object val;
1652
1653 CHECK_STRING (str);
1654
1655 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1656 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1657 charsets, table,
1658 STRING_MULTIBYTE (str));
1659 val = Qnil;
1660 for (i = charset_table_used - 1; i >= 0; i--)
1661 if (!NILP (AREF (charsets, i)))
1662 val = Fcons (CHARSET_NAME (charset_table + i), val);
1663 return val;
1664 }
1665
1666 \f
1667
1668 /* Return a unified character code for C (>= 0x110000). VAL is a
1669 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1670 charset symbol. */
1671 int
1672 maybe_unify_char (c, val)
1673 int c;
1674 Lisp_Object val;
1675 {
1676 struct charset *charset;
1677
1678 if (INTEGERP (val))
1679 return XINT (val);
1680 if (NILP (val))
1681 return c;
1682
1683 CHECK_CHARSET_GET_CHARSET (val, charset);
1684 load_charset (charset, 1);
1685 if (! inhibit_load_charset_map)
1686 {
1687 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1688 if (! NILP (val))
1689 c = XINT (val);
1690 }
1691 else
1692 {
1693 int code_index = c - CHARSET_CODE_OFFSET (charset);
1694 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1695
1696 if (unified > 0)
1697 c = unified;
1698 }
1699 return c;
1700 }
1701
1702
1703 /* Return a character correponding to the code-point CODE of
1704 CHARSET. */
1705
1706 int
1707 decode_char (charset, code)
1708 struct charset *charset;
1709 unsigned code;
1710 {
1711 int c, char_index;
1712 enum charset_method method = CHARSET_METHOD (charset);
1713
1714 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1715 return -1;
1716
1717 if (method == CHARSET_METHOD_SUBSET)
1718 {
1719 Lisp_Object subset_info;
1720
1721 subset_info = CHARSET_SUBSET (charset);
1722 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1723 code -= XINT (AREF (subset_info, 3));
1724 if (code < XFASTINT (AREF (subset_info, 1))
1725 || code > XFASTINT (AREF (subset_info, 2)))
1726 c = -1;
1727 else
1728 c = DECODE_CHAR (charset, code);
1729 }
1730 else if (method == CHARSET_METHOD_SUPERSET)
1731 {
1732 Lisp_Object parents;
1733
1734 parents = CHARSET_SUPERSET (charset);
1735 c = -1;
1736 for (; CONSP (parents); parents = XCDR (parents))
1737 {
1738 int id = XINT (XCAR (XCAR (parents)));
1739 int code_offset = XINT (XCDR (XCAR (parents)));
1740 unsigned this_code = code - code_offset;
1741
1742 charset = CHARSET_FROM_ID (id);
1743 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1744 break;
1745 }
1746 }
1747 else
1748 {
1749 char_index = CODE_POINT_TO_INDEX (charset, code);
1750 if (char_index < 0)
1751 return -1;
1752
1753 if (method == CHARSET_METHOD_MAP)
1754 {
1755 Lisp_Object decoder;
1756
1757 decoder = CHARSET_DECODER (charset);
1758 if (! VECTORP (decoder))
1759 {
1760 load_charset (charset, 1);
1761 decoder = CHARSET_DECODER (charset);
1762 }
1763 if (VECTORP (decoder))
1764 c = XINT (AREF (decoder, char_index));
1765 else
1766 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1767 }
1768 else /* method == CHARSET_METHOD_OFFSET */
1769 {
1770 c = char_index + CHARSET_CODE_OFFSET (charset);
1771 if (CHARSET_UNIFIED_P (charset)
1772 && c > MAX_UNICODE_CHAR)
1773 MAYBE_UNIFY_CHAR (c);
1774 }
1775 }
1776
1777 return c;
1778 }
1779
1780 /* Variable used temporarily by the macro ENCODE_CHAR. */
1781 Lisp_Object charset_work;
1782
1783 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1784 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1785 use CHARSET's strict_max_char instead of max_char. */
1786
1787 unsigned
1788 encode_char (charset, c)
1789 struct charset *charset;
1790 int c;
1791 {
1792 unsigned code;
1793 enum charset_method method = CHARSET_METHOD (charset);
1794
1795 if (CHARSET_UNIFIED_P (charset))
1796 {
1797 Lisp_Object deunifier, deunified;
1798 int code_index = -1;
1799
1800 deunifier = CHARSET_DEUNIFIER (charset);
1801 if (! CHAR_TABLE_P (deunifier))
1802 {
1803 load_charset (charset, 2);
1804 deunifier = CHARSET_DEUNIFIER (charset);
1805 }
1806 if (CHAR_TABLE_P (deunifier))
1807 {
1808 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1809
1810 if (INTEGERP (deunified))
1811 code_index = XINT (deunified);
1812 }
1813 else
1814 {
1815 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1816 }
1817 if (code_index >= 0)
1818 c = CHARSET_CODE_OFFSET (charset) + code_index;
1819 }
1820
1821 if (method == CHARSET_METHOD_SUBSET)
1822 {
1823 Lisp_Object subset_info;
1824 struct charset *this_charset;
1825
1826 subset_info = CHARSET_SUBSET (charset);
1827 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1828 code = ENCODE_CHAR (this_charset, c);
1829 if (code == CHARSET_INVALID_CODE (this_charset)
1830 || code < XFASTINT (AREF (subset_info, 1))
1831 || code > XFASTINT (AREF (subset_info, 2)))
1832 return CHARSET_INVALID_CODE (charset);
1833 code += XINT (AREF (subset_info, 3));
1834 return code;
1835 }
1836
1837 if (method == CHARSET_METHOD_SUPERSET)
1838 {
1839 Lisp_Object parents;
1840
1841 parents = CHARSET_SUPERSET (charset);
1842 for (; CONSP (parents); parents = XCDR (parents))
1843 {
1844 int id = XINT (XCAR (XCAR (parents)));
1845 int code_offset = XINT (XCDR (XCAR (parents)));
1846 struct charset *this_charset = CHARSET_FROM_ID (id);
1847
1848 code = ENCODE_CHAR (this_charset, c);
1849 if (code != CHARSET_INVALID_CODE (this_charset))
1850 return code + code_offset;
1851 }
1852 return CHARSET_INVALID_CODE (charset);
1853 }
1854
1855 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1856 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1857 return CHARSET_INVALID_CODE (charset);
1858
1859 if (method == CHARSET_METHOD_MAP)
1860 {
1861 Lisp_Object encoder;
1862 Lisp_Object val;
1863
1864 encoder = CHARSET_ENCODER (charset);
1865 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1866 {
1867 load_charset (charset, 2);
1868 encoder = CHARSET_ENCODER (charset);
1869 }
1870 if (CHAR_TABLE_P (encoder))
1871 {
1872 val = CHAR_TABLE_REF (encoder, c);
1873 if (NILP (val))
1874 return CHARSET_INVALID_CODE (charset);
1875 code = XINT (val);
1876 if (! CHARSET_COMPACT_CODES_P (charset))
1877 code = INDEX_TO_CODE_POINT (charset, code);
1878 }
1879 else
1880 {
1881 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1882 code = INDEX_TO_CODE_POINT (charset, code);
1883 }
1884 }
1885 else /* method == CHARSET_METHOD_OFFSET */
1886 {
1887 int code_index = c - CHARSET_CODE_OFFSET (charset);
1888
1889 code = INDEX_TO_CODE_POINT (charset, code_index);
1890 }
1891
1892 return code;
1893 }
1894
1895
1896 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1897 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1898 Return nil if CODE-POINT is not valid in CHARSET.
1899
1900 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1901
1902 Optional argument RESTRICTION specifies a way to map the pair of CCS
1903 and CODE-POINT to a character. Currently not supported and just ignored. */)
1904 (charset, code_point, restriction)
1905 Lisp_Object charset, code_point, restriction;
1906 {
1907 int c, id;
1908 unsigned code;
1909 struct charset *charsetp;
1910
1911 CHECK_CHARSET_GET_ID (charset, id);
1912 if (CONSP (code_point))
1913 {
1914 CHECK_NATNUM_CAR (code_point);
1915 CHECK_NATNUM_CDR (code_point);
1916 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1917 }
1918 else
1919 {
1920 CHECK_NATNUM (code_point);
1921 code = XINT (code_point);
1922 }
1923 charsetp = CHARSET_FROM_ID (id);
1924 c = DECODE_CHAR (charsetp, code);
1925 return (c >= 0 ? make_number (c) : Qnil);
1926 }
1927
1928
1929 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1930 doc: /* Encode the character CH into a code-point of CHARSET.
1931 Return nil if CHARSET doesn't include CH.
1932
1933 Optional argument RESTRICTION specifies a way to map CH to a
1934 code-point in CCS. Currently not supported and just ignored. */)
1935 (ch, charset, restriction)
1936 Lisp_Object ch, charset, restriction;
1937 {
1938 int id;
1939 unsigned code;
1940 struct charset *charsetp;
1941
1942 CHECK_CHARSET_GET_ID (charset, id);
1943 CHECK_NATNUM (ch);
1944 charsetp = CHARSET_FROM_ID (id);
1945 code = ENCODE_CHAR (charsetp, XINT (ch));
1946 if (code == CHARSET_INVALID_CODE (charsetp))
1947 return Qnil;
1948 if (code > 0x7FFFFFF)
1949 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1950 return make_number (code);
1951 }
1952
1953
1954 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1955 doc:
1956 /* Return a character of CHARSET whose position codes are CODEn.
1957
1958 CODE1 through CODE4 are optional, but if you don't supply sufficient
1959 position codes, it is assumed that the minimum code in each dimension
1960 is specified. */)
1961 (charset, code1, code2, code3, code4)
1962 Lisp_Object charset, code1, code2, code3, code4;
1963 {
1964 int id, dimension;
1965 struct charset *charsetp;
1966 unsigned code;
1967 int c;
1968
1969 CHECK_CHARSET_GET_ID (charset, id);
1970 charsetp = CHARSET_FROM_ID (id);
1971
1972 dimension = CHARSET_DIMENSION (charsetp);
1973 if (NILP (code1))
1974 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1975 ? 0 : CHARSET_MIN_CODE (charsetp));
1976 else
1977 {
1978 CHECK_NATNUM (code1);
1979 if (XFASTINT (code1) >= 0x100)
1980 args_out_of_range (make_number (0xFF), code1);
1981 code = XFASTINT (code1);
1982
1983 if (dimension > 1)
1984 {
1985 code <<= 8;
1986 if (NILP (code2))
1987 code |= charsetp->code_space[(dimension - 2) * 4];
1988 else
1989 {
1990 CHECK_NATNUM (code2);
1991 if (XFASTINT (code2) >= 0x100)
1992 args_out_of_range (make_number (0xFF), code2);
1993 code |= XFASTINT (code2);
1994 }
1995
1996 if (dimension > 2)
1997 {
1998 code <<= 8;
1999 if (NILP (code3))
2000 code |= charsetp->code_space[(dimension - 3) * 4];
2001 else
2002 {
2003 CHECK_NATNUM (code3);
2004 if (XFASTINT (code3) >= 0x100)
2005 args_out_of_range (make_number (0xFF), code3);
2006 code |= XFASTINT (code3);
2007 }
2008
2009 if (dimension > 3)
2010 {
2011 code <<= 8;
2012 if (NILP (code4))
2013 code |= charsetp->code_space[0];
2014 else
2015 {
2016 CHECK_NATNUM (code4);
2017 if (XFASTINT (code4) >= 0x100)
2018 args_out_of_range (make_number (0xFF), code4);
2019 code |= XFASTINT (code4);
2020 }
2021 }
2022 }
2023 }
2024 }
2025
2026 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2027 code &= 0x7F7F7F7F;
2028 c = DECODE_CHAR (charsetp, code);
2029 if (c < 0)
2030 error ("Invalid code(s)");
2031 return make_number (c);
2032 }
2033
2034
2035 /* Return the first charset in CHARSET_LIST that contains C.
2036 CHARSET_LIST is a list of charset IDs. If it is nil, use
2037 Vcharset_ordered_list. */
2038
2039 struct charset *
2040 char_charset (c, charset_list, code_return)
2041 int c;
2042 Lisp_Object charset_list;
2043 unsigned *code_return;
2044 {
2045 int maybe_null = 0;
2046
2047 if (NILP (charset_list))
2048 charset_list = Vcharset_ordered_list;
2049 else
2050 maybe_null = 1;
2051
2052 while (CONSP (charset_list))
2053 {
2054 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2055 unsigned code = ENCODE_CHAR (charset, c);
2056
2057 if (code != CHARSET_INVALID_CODE (charset))
2058 {
2059 if (code_return)
2060 *code_return = code;
2061 return charset;
2062 }
2063 charset_list = XCDR (charset_list);
2064 if (c <= MAX_UNICODE_CHAR
2065 && EQ (charset_list, Vcharset_non_preferred_head))
2066 return CHARSET_FROM_ID (charset_unicode);
2067 }
2068 return (maybe_null ? NULL
2069 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2070 : CHARSET_FROM_ID (charset_eight_bit));
2071 }
2072
2073
2074 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2075 doc:
2076 /*Return list of charset and one to four position-codes of CH.
2077 The charset is decided by the current priority order of charsets.
2078 A position-code is a byte value of each dimension of the code-point of
2079 CH in the charset. */)
2080 (ch)
2081 Lisp_Object ch;
2082 {
2083 struct charset *charset;
2084 int c, dimension;
2085 unsigned code;
2086 Lisp_Object val;
2087
2088 CHECK_CHARACTER (ch);
2089 c = XFASTINT (ch);
2090 charset = CHAR_CHARSET (c);
2091 if (! charset)
2092 abort ();
2093 code = ENCODE_CHAR (charset, c);
2094 if (code == CHARSET_INVALID_CODE (charset))
2095 abort ();
2096 dimension = CHARSET_DIMENSION (charset);
2097 for (val = Qnil; dimension > 0; dimension--)
2098 {
2099 val = Fcons (make_number (code & 0xFF), val);
2100 code >>= 8;
2101 }
2102 return Fcons (CHARSET_NAME (charset), val);
2103 }
2104
2105
2106 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2107 doc: /* Return the charset of highest priority that contains CH.
2108 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2109 from which to find the charset. It may also be a coding system. In
2110 that case, find the charset from what supported by that coding system. */)
2111 (ch, restriction)
2112 Lisp_Object ch, restriction;
2113 {
2114 struct charset *charset;
2115
2116 CHECK_CHARACTER (ch);
2117 if (NILP (restriction))
2118 charset = CHAR_CHARSET (XINT (ch));
2119 else
2120 {
2121 Lisp_Object charset_list;
2122
2123 if (CONSP (restriction))
2124 {
2125 for (charset_list = Qnil; CONSP (restriction);
2126 restriction = XCDR (restriction))
2127 {
2128 int id;
2129
2130 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2131 charset_list = Fcons (make_number (id), charset_list);
2132 }
2133 charset_list = Fnreverse (charset_list);
2134 }
2135 else
2136 charset_list = coding_system_charset_list (restriction);
2137 charset = char_charset (XINT (ch), charset_list, NULL);
2138 if (! charset)
2139 return Qnil;
2140 }
2141 return (CHARSET_NAME (charset));
2142 }
2143
2144
2145 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2146 doc: /*
2147 Return charset of a character in the current buffer at position POS.
2148 If POS is nil, it defauls to the current point.
2149 If POS is out of range, the value is nil. */)
2150 (pos)
2151 Lisp_Object pos;
2152 {
2153 Lisp_Object ch;
2154 struct charset *charset;
2155
2156 ch = Fchar_after (pos);
2157 if (! INTEGERP (ch))
2158 return ch;
2159 charset = CHAR_CHARSET (XINT (ch));
2160 return (CHARSET_NAME (charset));
2161 }
2162
2163
2164 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2165 doc: /*
2166 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2167
2168 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2169 by their DIMENSION, CHARS, and FINAL-CHAR,
2170 whereas Emacs distinguishes them by charset symbol.
2171 See the documentation of the function `charset-info' for the meanings of
2172 DIMENSION, CHARS, and FINAL-CHAR. */)
2173 (dimension, chars, final_char)
2174 Lisp_Object dimension, chars, final_char;
2175 {
2176 int id;
2177 int chars_flag;
2178
2179 check_iso_charset_parameter (dimension, chars, final_char);
2180 chars_flag = XFASTINT (chars) == 96;
2181 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2182 XFASTINT (final_char));
2183 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2184 }
2185
2186
2187 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2188 0, 0, 0,
2189 doc: /*
2190 Internal use only.
2191 Clear temporary charset mapping tables.
2192 It should be called only from temacs invoked for dumping. */)
2193 ()
2194 {
2195 int i;
2196 struct charset *charset;
2197 Lisp_Object attrs;
2198
2199 if (temp_charset_work)
2200 {
2201 free (temp_charset_work);
2202 temp_charset_work = NULL;
2203 }
2204
2205 if (CHAR_TABLE_P (Vchar_unify_table))
2206 Foptimize_char_table (Vchar_unify_table, Qnil);
2207
2208 return Qnil;
2209 }
2210
2211 DEFUN ("charset-priority-list", Fcharset_priority_list,
2212 Scharset_priority_list, 0, 1, 0,
2213 doc: /* Return the list of charsets ordered by priority.
2214 HIGHESTP non-nil means just return the highest priority one. */)
2215 (highestp)
2216 Lisp_Object highestp;
2217 {
2218 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2219
2220 if (!NILP (highestp))
2221 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2222
2223 while (!NILP (list))
2224 {
2225 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2226 list = XCDR (list);
2227 }
2228 return Fnreverse (val);
2229 }
2230
2231 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2232 1, MANY, 0,
2233 doc: /* Assign higher priority to the charsets given as arguments.
2234 usage: (set-charset-priority &rest charsets) */)
2235 (nargs, args)
2236 int nargs;
2237 Lisp_Object *args;
2238 {
2239 Lisp_Object new_head, old_list, arglist[2];
2240 Lisp_Object list_2022, list_emacs_mule;
2241 int i, id;
2242
2243 old_list = Fcopy_sequence (Vcharset_ordered_list);
2244 new_head = Qnil;
2245 for (i = 0; i < nargs; i++)
2246 {
2247 CHECK_CHARSET_GET_ID (args[i], id);
2248 if (! NILP (Fmemq (make_number (id), old_list)))
2249 {
2250 old_list = Fdelq (make_number (id), old_list);
2251 new_head = Fcons (make_number (id), new_head);
2252 }
2253 }
2254 arglist[0] = Fnreverse (new_head);
2255 arglist[1] = Vcharset_non_preferred_head = old_list;
2256 Vcharset_ordered_list = Fnconc (2, arglist);
2257 charset_ordered_list_tick++;
2258
2259 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2260 CONSP (old_list); old_list = XCDR (old_list))
2261 {
2262 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2263 list_2022 = Fcons (XCAR (old_list), list_2022);
2264 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2265 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2266 }
2267 Viso_2022_charset_list = Fnreverse (list_2022);
2268 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2269
2270 return Qnil;
2271 }
2272
2273 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2274 0, 1, 0,
2275 doc: /* Internal use only.
2276 Return charset identification number of CHARSET. */)
2277 (charset)
2278 Lisp_Object charset;
2279 {
2280 int id;
2281
2282 CHECK_CHARSET_GET_ID (charset, id);
2283 return make_number (id);
2284 }
2285
2286 \f
2287 void
2288 init_charset ()
2289 {
2290 Lisp_Object tempdir;
2291 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2292 if (access (SDATA (tempdir), 0) < 0)
2293 {
2294 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2295 Emacs will not function correctly without the character map files.\n\
2296 Please check your installation!\n",
2297 tempdir);
2298 /* TODO should this be a fatal error? (Bug#909) */
2299 }
2300
2301 Vcharset_map_path = Fcons (tempdir, Qnil);
2302 }
2303
2304
2305 void
2306 init_charset_once ()
2307 {
2308 int i, j, k;
2309
2310 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2311 for (j = 0; j < ISO_MAX_CHARS; j++)
2312 for (k = 0; k < ISO_MAX_FINAL; k++)
2313 iso_charset_table[i][j][k] = -1;
2314
2315 for (i = 0; i < 256; i++)
2316 emacs_mule_charset[i] = NULL;
2317
2318 charset_jisx0201_roman = -1;
2319 charset_jisx0208_1978 = -1;
2320 charset_jisx0208 = -1;
2321
2322 for (i = 0; i < 128; i++)
2323 unibyte_to_multibyte_table[i] = i;
2324 for (; i < 256; i++)
2325 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2326 }
2327
2328 #ifdef emacs
2329
2330 void
2331 syms_of_charset ()
2332 {
2333 DEFSYM (Qcharsetp, "charsetp");
2334
2335 DEFSYM (Qascii, "ascii");
2336 DEFSYM (Qunicode, "unicode");
2337 DEFSYM (Qemacs, "emacs");
2338 DEFSYM (Qeight_bit, "eight-bit");
2339 DEFSYM (Qiso_8859_1, "iso-8859-1");
2340
2341 DEFSYM (Qgl, "gl");
2342 DEFSYM (Qgr, "gr");
2343
2344 staticpro (&Vcharset_ordered_list);
2345 Vcharset_ordered_list = Qnil;
2346
2347 staticpro (&Viso_2022_charset_list);
2348 Viso_2022_charset_list = Qnil;
2349
2350 staticpro (&Vemacs_mule_charset_list);
2351 Vemacs_mule_charset_list = Qnil;
2352
2353 /* Don't staticpro them here. It's done in syms_of_fns. */
2354 QCtest = intern (":test");
2355 Qeq = intern ("eq");
2356
2357 staticpro (&Vcharset_hash_table);
2358 {
2359 Lisp_Object args[2];
2360 args[0] = QCtest;
2361 args[1] = Qeq;
2362 Vcharset_hash_table = Fmake_hash_table (2, args);
2363 }
2364
2365 charset_table_size = 128;
2366 charset_table = ((struct charset *)
2367 xmalloc (sizeof (struct charset) * charset_table_size));
2368 charset_table_used = 0;
2369
2370 defsubr (&Scharsetp);
2371 defsubr (&Smap_charset_chars);
2372 defsubr (&Sdefine_charset_internal);
2373 defsubr (&Sdefine_charset_alias);
2374 defsubr (&Scharset_plist);
2375 defsubr (&Sset_charset_plist);
2376 defsubr (&Sunify_charset);
2377 defsubr (&Sget_unused_iso_final_char);
2378 defsubr (&Sdeclare_equiv_charset);
2379 defsubr (&Sfind_charset_region);
2380 defsubr (&Sfind_charset_string);
2381 defsubr (&Sdecode_char);
2382 defsubr (&Sencode_char);
2383 defsubr (&Ssplit_char);
2384 defsubr (&Smake_char);
2385 defsubr (&Schar_charset);
2386 defsubr (&Scharset_after);
2387 defsubr (&Siso_charset);
2388 defsubr (&Sclear_charset_maps);
2389 defsubr (&Scharset_priority_list);
2390 defsubr (&Sset_charset_priority);
2391 defsubr (&Scharset_id_internal);
2392
2393 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2394 doc: /* *List of directories to search for charset map files. */);
2395 Vcharset_map_path = Qnil;
2396
2397 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2398 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2399 inhibit_load_charset_map = 0;
2400
2401 DEFVAR_LISP ("charset-list", &Vcharset_list,
2402 doc: /* List of all charsets ever defined. */);
2403 Vcharset_list = Qnil;
2404
2405 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2406 doc: /* ISO639 language mnemonic symbol for the current language environment.
2407 If the current language environment is for multiple languages (e.g. "Latin-1"),
2408 the value may be a list of mnemonics. */);
2409 Vcurrent_iso639_language = Qnil;
2410
2411 charset_ascii
2412 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2413 0, 127, 'B', -1, 0, 1, 0, 0);
2414 charset_iso_8859_1
2415 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2416 0, 255, -1, -1, -1, 1, 0, 0);
2417 charset_unicode
2418 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2419 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2420 charset_emacs
2421 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2422 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2423 charset_eight_bit
2424 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2425 128, 255, -1, 0, -1, 0, 1,
2426 MAX_5_BYTE_CHAR + 1);
2427 }
2428
2429 #endif /* emacs */
2430
2431 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2432 (do not change this comment) */