(load_charset_map_from_file): When a mapfile can't be loaded, signal
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77 Lisp_Object Qemacs;
78
79 /* The corresponding charsets. */
80 int charset_ascii;
81 int charset_eight_bit;
82 int charset_iso_8859_1;
83 int charset_unicode;
84 int charset_emacs;
85
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
90
91 /* Value of charset attribute `charset-iso-plane'. */
92 Lisp_Object Qgl, Qgr;
93
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
96
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
99
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
103
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
108
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
111
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
114
115 struct charset *emacs_mule_charset[256];
116
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
121 Lisp_Object Vcharset_map_path;
122
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map;
125
126 Lisp_Object Vcurrent_iso639_language;
127
128 /* Defined in chartab.c */
129 extern void
130 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
134
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
150 : -1)
151
152
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
155
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
169
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
172
173 static struct
174 {
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203 } *temp_charset_work;
204
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
227 \f
228
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded;
232
233 struct charset_map_entries
234 {
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
240 };
241
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
251
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
257
258 If CONTROL_FLAG is 2, setup the following tables.
259
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265 */
266
267 static void
268 load_charset_map (charset, entries, n_entries, control_flag)
269 struct charset *charset;
270 struct charset_map_entries *entries;
271 int n_entries;
272 int control_flag;
273 {
274 Lisp_Object vec, table;
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
278 int i;
279 unsigned char *fast_map = charset->fast_map;
280
281 if (n_entries <= 0)
282 return;
283
284 if (control_flag)
285 {
286 if (! inhibit_load_charset_map)
287 {
288 if (control_flag == 1)
289 {
290 if (charset->method == CHARSET_METHOD_MAP)
291 {
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
293
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
333 charset_map_loaded = 1;
334 }
335
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
339 {
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
344
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
352 {
353 to_index = from_index;
354 to_c = from_c;
355 }
356 else
357 {
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
360 }
361 if (from_index < 0 || to_index < 0)
362 continue;
363
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
368
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
418
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
421 }
422 }
423
424 if (control_flag == 0)
425 {
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
429 }
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
435 }
436
437
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
440
441 static INLINE unsigned
442 read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
445 {
446 int c;
447 unsigned n;
448
449 while ((c = getc (fp)) != EOF)
450 {
451 if (c == '#')
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
460 }
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
478 }
479
480 extern Lisp_Object Qfile_name_handler_alist;
481
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
484 0xAAAA 0xCCCC
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
487 0xAAAA-0xBBBB 0xCCCC
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
490
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
494 range.
495
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
498
499 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
500
501 static void
502 load_charset_map_from_file (charset, mapfile, control_flag)
503 struct charset *charset;
504 Lisp_Object mapfile;
505 int control_flag;
506 {
507 unsigned min_code = CHARSET_MIN_CODE (charset);
508 unsigned max_code = CHARSET_MAX_CODE (charset);
509 int fd;
510 FILE *fp;
511 int eof;
512 Lisp_Object suffixes;
513 struct charset_map_entries *head, *entries;
514 int n_entries;
515 int count = SPECPDL_INDEX ();
516
517 suffixes = Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil));
519
520 specbind (Qfile_name_handler_alist, Qnil);
521 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
522 unbind_to (count, Qnil);
523 if (fd < 0
524 || ! (fp = fdopen (fd, "r")))
525 error ("Failure in loading charset map: %S", SDATA (mapfile));
526
527 head = entries = ((struct charset_map_entries *)
528 alloca (sizeof (struct charset_map_entries)));
529 n_entries = 0;
530 eof = 0;
531 while (1)
532 {
533 unsigned from, to;
534 int c;
535 int idx;
536
537 from = read_hex (fp, &eof);
538 if (eof)
539 break;
540 if (getc (fp) == '-')
541 to = read_hex (fp, &eof);
542 else
543 to = from;
544 c = (int) read_hex (fp, &eof);
545
546 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
547 continue;
548
549 if (n_entries > 0 && (n_entries % 0x10000) == 0)
550 {
551 entries->next = ((struct charset_map_entries *)
552 alloca (sizeof (struct charset_map_entries)));
553 entries = entries->next;
554 }
555 idx = n_entries % 0x10000;
556 entries->entry[idx].from = from;
557 entries->entry[idx].to = to;
558 entries->entry[idx].c = c;
559 n_entries++;
560 }
561 fclose (fp);
562 close (fd);
563
564 load_charset_map (charset, head, n_entries, control_flag);
565 }
566
567 static void
568 load_charset_map_from_vector (charset, vec, control_flag)
569 struct charset *charset;
570 Lisp_Object vec;
571 int control_flag;
572 {
573 unsigned min_code = CHARSET_MIN_CODE (charset);
574 unsigned max_code = CHARSET_MAX_CODE (charset);
575 struct charset_map_entries *head, *entries;
576 int n_entries;
577 int len = ASIZE (vec);
578 int i;
579
580 if (len % 2 == 1)
581 {
582 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
583 return;
584 }
585
586 head = entries = ((struct charset_map_entries *)
587 alloca (sizeof (struct charset_map_entries)));
588 n_entries = 0;
589 for (i = 0; i < len; i += 2)
590 {
591 Lisp_Object val, val2;
592 unsigned from, to;
593 int c;
594 int idx;
595
596 val = AREF (vec, i);
597 if (CONSP (val))
598 {
599 val2 = XCDR (val);
600 val = XCAR (val);
601 CHECK_NATNUM (val);
602 CHECK_NATNUM (val2);
603 from = XFASTINT (val);
604 to = XFASTINT (val2);
605 }
606 else
607 {
608 CHECK_NATNUM (val);
609 from = to = XFASTINT (val);
610 }
611 val = AREF (vec, i + 1);
612 CHECK_NATNUM (val);
613 c = XFASTINT (val);
614
615 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
616 continue;
617
618 if (n_entries > 0 && (n_entries % 0x10000) == 0)
619 {
620 entries->next = ((struct charset_map_entries *)
621 alloca (sizeof (struct charset_map_entries)));
622 entries = entries->next;
623 }
624 idx = n_entries % 0x10000;
625 entries->entry[idx].from = from;
626 entries->entry[idx].to = to;
627 entries->entry[idx].c = c;
628 n_entries++;
629 }
630
631 load_charset_map (charset, head, n_entries, control_flag);
632 }
633
634
635 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
636 map it is (see the comment of load_charset_map for the detail). */
637
638 static void
639 load_charset (charset, control_flag)
640 struct charset *charset;
641 int control_flag;
642 {
643 Lisp_Object map;
644
645 if (inhibit_load_charset_map
646 && temp_charset_work
647 && charset == temp_charset_work->current
648 && (control_flag == 2 == temp_charset_work->for_encoder))
649 return;
650
651 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
652 map = CHARSET_MAP (charset);
653 else if (CHARSET_UNIFIED_P (charset))
654 map = CHARSET_UNIFY_MAP (charset);
655 if (STRINGP (map))
656 load_charset_map_from_file (charset, map, control_flag);
657 else
658 load_charset_map_from_vector (charset, map, control_flag);
659 }
660
661
662 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
663 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
664 (object)
665 Lisp_Object object;
666 {
667 return (CHARSETP (object) ? Qt : Qnil);
668 }
669
670
671 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
672 Lisp_Object function, Lisp_Object arg,
673 unsigned from, unsigned to));
674
675 void
676 map_charset_for_dump (c_function, function, arg, from, to)
677 void (*c_function) (Lisp_Object, Lisp_Object);
678 Lisp_Object function, arg;
679 unsigned from, to;
680 {
681 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
682 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
683 Lisp_Object range;
684 int c, stop;
685 struct gcpro gcpro1;
686
687 range = Fcons (Qnil, Qnil);
688 GCPRO1 (range);
689
690 c = temp_charset_work->min_char;
691 stop = (temp_charset_work->max_char < 0x20000
692 ? temp_charset_work->max_char : 0xFFFF);
693
694 while (1)
695 {
696 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
697
698 if (index >= from_idx && index <= to_idx)
699 {
700 if (NILP (XCAR (range)))
701 XSETCAR (range, make_number (c));
702 }
703 else if (! NILP (XCAR (range)))
704 {
705 XSETCDR (range, make_number (c - 1));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
710 XSETCAR (range, Qnil);
711 }
712 if (c == stop)
713 {
714 if (c == temp_charset_work->max_char)
715 {
716 if (! NILP (XCAR (range)))
717 {
718 XSETCDR (range, make_number (c));
719 if (c_function)
720 (*c_function) (arg, range);
721 else
722 call2 (function, range, arg);
723 }
724 break;
725 }
726 c = 0x1FFFF;
727 stop = temp_charset_work->max_char;
728 }
729 c++;
730 }
731 }
732
733 void
734 map_charset_chars (c_function, function, arg,
735 charset, from, to)
736 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
737 Lisp_Object function, arg;
738 struct charset *charset;
739 unsigned from, to;
740 {
741 Lisp_Object range;
742 int partial;
743
744 partial = (from > CHARSET_MIN_CODE (charset)
745 || to < CHARSET_MAX_CODE (charset));
746
747 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
748 {
749 int from_idx = CODE_POINT_TO_INDEX (charset, from);
750 int to_idx = CODE_POINT_TO_INDEX (charset, to);
751 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
752 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
753
754 if (CHARSET_UNIFIED_P (charset))
755 {
756 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
757 load_charset (charset, 2);
758 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
759 map_char_table_for_charset (c_function, function,
760 CHARSET_DEUNIFIER (charset), arg,
761 partial ? charset : NULL, from, to);
762 else
763 map_charset_for_dump (c_function, function, arg, from, to);
764 }
765
766 range = Fcons (make_number (from_c), make_number (to_c));
767 if (NILP (function))
768 (*c_function) (arg, range);
769 else
770 call2 (function, range, arg);
771 }
772 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
773 {
774 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
775 load_charset (charset, 2);
776 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
777 map_char_table_for_charset (c_function, function,
778 CHARSET_ENCODER (charset), arg,
779 partial ? charset : NULL, from, to);
780 else
781 map_charset_for_dump (c_function, function, arg, from, to);
782 }
783 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
784 {
785 Lisp_Object subset_info;
786 int offset;
787
788 subset_info = CHARSET_SUBSET (charset);
789 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
790 offset = XINT (AREF (subset_info, 3));
791 from -= offset;
792 if (from < XFASTINT (AREF (subset_info, 1)))
793 from = XFASTINT (AREF (subset_info, 1));
794 to -= offset;
795 if (to > XFASTINT (AREF (subset_info, 2)))
796 to = XFASTINT (AREF (subset_info, 2));
797 map_charset_chars (c_function, function, arg, charset, from, to);
798 }
799 else /* i.e. CHARSET_METHOD_SUPERSET */
800 {
801 Lisp_Object parents;
802
803 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
804 parents = XCDR (parents))
805 {
806 int offset;
807 unsigned this_from, this_to;
808
809 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
810 offset = XINT (XCDR (XCAR (parents)));
811 this_from = from - offset;
812 this_to = to - offset;
813 if (this_from < CHARSET_MIN_CODE (charset))
814 this_from = CHARSET_MIN_CODE (charset);
815 if (this_to > CHARSET_MAX_CODE (charset))
816 this_to = CHARSET_MAX_CODE (charset);
817 map_charset_chars (c_function, function, arg, charset,
818 this_from, this_to);
819 }
820 }
821 }
822
823 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
824 doc: /* Call FUNCTION for all characters in CHARSET.
825 FUNCTION is called with an argument RANGE and the optional 3rd
826 argument ARG.
827
828 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
829 characters contained in CHARSET.
830
831 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
832 range of code points of target characters. */)
833 (function, charset, arg, from_code, to_code)
834 Lisp_Object function, charset, arg, from_code, to_code;
835 {
836 struct charset *cs;
837 unsigned from, to;
838
839 CHECK_CHARSET_GET_CHARSET (charset, cs);
840 if (NILP (from_code))
841 from = CHARSET_MIN_CODE (cs);
842 else
843 {
844 CHECK_NATNUM (from_code);
845 from = XINT (from_code);
846 if (from < CHARSET_MIN_CODE (cs))
847 from = CHARSET_MIN_CODE (cs);
848 }
849 if (NILP (to_code))
850 to = CHARSET_MAX_CODE (cs);
851 else
852 {
853 CHECK_NATNUM (to_code);
854 to = XINT (to_code);
855 if (to > CHARSET_MAX_CODE (cs))
856 to = CHARSET_MAX_CODE (cs);
857 }
858 map_charset_chars (NULL, function, arg, cs, from, to);
859 return Qnil;
860 }
861
862
863 /* Define a charset according to the arguments. The Nth argument is
864 the Nth attribute of the charset (the last attribute `charset-id'
865 is not included). See the docstring of `define-charset' for the
866 detail. */
867
868 DEFUN ("define-charset-internal", Fdefine_charset_internal,
869 Sdefine_charset_internal, charset_arg_max, MANY, 0,
870 doc: /* For internal use only.
871 usage: (define-charset-internal ...) */)
872 (nargs, args)
873 int nargs;
874 Lisp_Object *args;
875 {
876 /* Charset attr vector. */
877 Lisp_Object attrs;
878 Lisp_Object val;
879 unsigned hash_code;
880 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
881 int i, j;
882 struct charset charset;
883 int id;
884 int dimension;
885 int new_definition_p;
886 int nchars;
887
888 if (nargs != charset_arg_max)
889 return Fsignal (Qwrong_number_of_arguments,
890 Fcons (intern ("define-charset-internal"),
891 make_number (nargs)));
892
893 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
894
895 CHECK_SYMBOL (args[charset_arg_name]);
896 ASET (attrs, charset_name, args[charset_arg_name]);
897
898 val = args[charset_arg_code_space];
899 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
900 {
901 int min_byte, max_byte;
902
903 min_byte = XINT (Faref (val, make_number (i * 2)));
904 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
905 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
906 error ("Invalid :code-space value");
907 charset.code_space[i * 4] = min_byte;
908 charset.code_space[i * 4 + 1] = max_byte;
909 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
910 nchars *= charset.code_space[i * 4 + 2];
911 charset.code_space[i * 4 + 3] = nchars;
912 if (max_byte > 0)
913 dimension = i + 1;
914 }
915
916 val = args[charset_arg_dimension];
917 if (NILP (val))
918 charset.dimension = dimension;
919 else
920 {
921 CHECK_NATNUM (val);
922 charset.dimension = XINT (val);
923 if (charset.dimension < 1 || charset.dimension > 4)
924 args_out_of_range_3 (val, make_number (1), make_number (4));
925 }
926
927 charset.code_linear_p
928 = (charset.dimension == 1
929 || (charset.code_space[2] == 256
930 && (charset.dimension == 2
931 || (charset.code_space[6] == 256
932 && (charset.dimension == 3
933 || charset.code_space[10] == 256)))));
934
935 if (! charset.code_linear_p)
936 {
937 charset.code_space_mask = (unsigned char *) xmalloc (256);
938 bzero (charset.code_space_mask, 256);
939 for (i = 0; i < 4; i++)
940 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
941 j++)
942 charset.code_space_mask[j] |= (1 << i);
943 }
944
945 charset.iso_chars_96 = charset.code_space[2] == 96;
946
947 charset.min_code = (charset.code_space[0]
948 | (charset.code_space[4] << 8)
949 | (charset.code_space[8] << 16)
950 | (charset.code_space[12] << 24));
951 charset.max_code = (charset.code_space[1]
952 | (charset.code_space[5] << 8)
953 | (charset.code_space[9] << 16)
954 | (charset.code_space[13] << 24));
955 charset.char_index_offset = 0;
956
957 val = args[charset_arg_min_code];
958 if (! NILP (val))
959 {
960 unsigned code;
961
962 if (INTEGERP (val))
963 code = XINT (val);
964 else
965 {
966 CHECK_CONS (val);
967 CHECK_NUMBER_CAR (val);
968 CHECK_NUMBER_CDR (val);
969 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
970 }
971 if (code < charset.min_code
972 || code > charset.max_code)
973 args_out_of_range_3 (make_number (charset.min_code),
974 make_number (charset.max_code), val);
975 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
976 charset.min_code = code;
977 }
978
979 val = args[charset_arg_max_code];
980 if (! NILP (val))
981 {
982 unsigned code;
983
984 if (INTEGERP (val))
985 code = XINT (val);
986 else
987 {
988 CHECK_CONS (val);
989 CHECK_NUMBER_CAR (val);
990 CHECK_NUMBER_CDR (val);
991 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
992 }
993 if (code < charset.min_code
994 || code > charset.max_code)
995 args_out_of_range_3 (make_number (charset.min_code),
996 make_number (charset.max_code), val);
997 charset.max_code = code;
998 }
999
1000 charset.compact_codes_p = charset.max_code < 0x10000;
1001
1002 val = args[charset_arg_invalid_code];
1003 if (NILP (val))
1004 {
1005 if (charset.min_code > 0)
1006 charset.invalid_code = 0;
1007 else
1008 {
1009 XSETINT (val, charset.max_code + 1);
1010 if (XINT (val) == charset.max_code + 1)
1011 charset.invalid_code = charset.max_code + 1;
1012 else
1013 error ("Attribute :invalid-code must be specified");
1014 }
1015 }
1016 else
1017 {
1018 CHECK_NATNUM (val);
1019 charset.invalid_code = XFASTINT (val);
1020 }
1021
1022 val = args[charset_arg_iso_final];
1023 if (NILP (val))
1024 charset.iso_final = -1;
1025 else
1026 {
1027 CHECK_NUMBER (val);
1028 if (XINT (val) < '0' || XINT (val) > 127)
1029 error ("Invalid iso-final-char: %d", XINT (val));
1030 charset.iso_final = XINT (val);
1031 }
1032
1033 val = args[charset_arg_iso_revision];
1034 if (NILP (val))
1035 charset.iso_revision = -1;
1036 else
1037 {
1038 CHECK_NUMBER (val);
1039 if (XINT (val) > 63)
1040 args_out_of_range (make_number (63), val);
1041 charset.iso_revision = XINT (val);
1042 }
1043
1044 val = args[charset_arg_emacs_mule_id];
1045 if (NILP (val))
1046 charset.emacs_mule_id = -1;
1047 else
1048 {
1049 CHECK_NATNUM (val);
1050 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1051 error ("Invalid emacs-mule-id: %d", XINT (val));
1052 charset.emacs_mule_id = XINT (val);
1053 }
1054
1055 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1056
1057 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1058
1059 charset.unified_p = 0;
1060
1061 bzero (charset.fast_map, sizeof (charset.fast_map));
1062
1063 if (! NILP (args[charset_arg_code_offset]))
1064 {
1065 val = args[charset_arg_code_offset];
1066 CHECK_NUMBER (val);
1067
1068 charset.method = CHARSET_METHOD_OFFSET;
1069 charset.code_offset = XINT (val);
1070
1071 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1072 charset.min_char = i + charset.code_offset;
1073 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1074 charset.max_char = i + charset.code_offset;
1075 if (charset.max_char > MAX_CHAR)
1076 error ("Unsupported max char: %d", charset.max_char);
1077
1078 i = (charset.min_char >> 7) << 7;
1079 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1080 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1081 i = (i >> 12) << 12;
1082 for (; i <= charset.max_char; i += 0x1000)
1083 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1084 }
1085 else if (! NILP (args[charset_arg_map]))
1086 {
1087 val = args[charset_arg_map];
1088 ASET (attrs, charset_map, val);
1089 charset.method = CHARSET_METHOD_MAP;
1090 }
1091 else if (! NILP (args[charset_arg_subset]))
1092 {
1093 Lisp_Object parent;
1094 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1095 struct charset *parent_charset;
1096
1097 val = args[charset_arg_subset];
1098 parent = Fcar (val);
1099 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1100 parent_min_code = Fnth (make_number (1), val);
1101 CHECK_NATNUM (parent_min_code);
1102 parent_max_code = Fnth (make_number (2), val);
1103 CHECK_NATNUM (parent_max_code);
1104 parent_code_offset = Fnth (make_number (3), val);
1105 CHECK_NUMBER (parent_code_offset);
1106 val = Fmake_vector (make_number (4), Qnil);
1107 ASET (val, 0, make_number (parent_charset->id));
1108 ASET (val, 1, parent_min_code);
1109 ASET (val, 2, parent_max_code);
1110 ASET (val, 3, parent_code_offset);
1111 ASET (attrs, charset_subset, val);
1112
1113 charset.method = CHARSET_METHOD_SUBSET;
1114 /* Here, we just copy the parent's fast_map. It's not accurate,
1115 but at least it works for quickly detecting which character
1116 DOESN'T belong to this charset. */
1117 for (i = 0; i < 190; i++)
1118 charset.fast_map[i] = parent_charset->fast_map[i];
1119
1120 /* We also copy these for parents. */
1121 charset.min_char = parent_charset->min_char;
1122 charset.max_char = parent_charset->max_char;
1123 }
1124 else if (! NILP (args[charset_arg_superset]))
1125 {
1126 val = args[charset_arg_superset];
1127 charset.method = CHARSET_METHOD_SUPERSET;
1128 val = Fcopy_sequence (val);
1129 ASET (attrs, charset_superset, val);
1130
1131 charset.min_char = MAX_CHAR;
1132 charset.max_char = 0;
1133 for (; ! NILP (val); val = Fcdr (val))
1134 {
1135 Lisp_Object elt, car_part, cdr_part;
1136 int this_id, offset;
1137 struct charset *this_charset;
1138
1139 elt = Fcar (val);
1140 if (CONSP (elt))
1141 {
1142 car_part = XCAR (elt);
1143 cdr_part = XCDR (elt);
1144 CHECK_CHARSET_GET_ID (car_part, this_id);
1145 CHECK_NUMBER (cdr_part);
1146 offset = XINT (cdr_part);
1147 }
1148 else
1149 {
1150 CHECK_CHARSET_GET_ID (elt, this_id);
1151 offset = 0;
1152 }
1153 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1154
1155 this_charset = CHARSET_FROM_ID (this_id);
1156 if (charset.min_char > this_charset->min_char)
1157 charset.min_char = this_charset->min_char;
1158 if (charset.max_char < this_charset->max_char)
1159 charset.max_char = this_charset->max_char;
1160 for (i = 0; i < 190; i++)
1161 charset.fast_map[i] |= this_charset->fast_map[i];
1162 }
1163 }
1164 else
1165 error ("None of :code-offset, :map, :parents are specified");
1166
1167 val = args[charset_arg_unify_map];
1168 if (! NILP (val) && !STRINGP (val))
1169 CHECK_VECTOR (val);
1170 ASET (attrs, charset_unify_map, val);
1171
1172 CHECK_LIST (args[charset_arg_plist]);
1173 ASET (attrs, charset_plist, args[charset_arg_plist]);
1174
1175 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1176 &hash_code);
1177 if (charset.hash_index >= 0)
1178 {
1179 new_definition_p = 0;
1180 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1181 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1182 }
1183 else
1184 {
1185 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1186 hash_code);
1187 if (charset_table_used == charset_table_size)
1188 {
1189 struct charset *new_table
1190 = (struct charset *) xmalloc (sizeof (struct charset)
1191 * (charset_table_size + 16));
1192 bcopy (charset_table, new_table,
1193 sizeof (struct charset) * charset_table_size);
1194 charset_table_size += 16;
1195 charset_table = new_table;
1196 }
1197 id = charset_table_used++;
1198 new_definition_p = 1;
1199 }
1200
1201 ASET (attrs, charset_id, make_number (id));
1202 charset.id = id;
1203 charset_table[id] = charset;
1204
1205 if (charset.method == CHARSET_METHOD_MAP)
1206 {
1207 load_charset (&charset, 0);
1208 charset_table[id] = charset;
1209 }
1210
1211 if (charset.iso_final >= 0)
1212 {
1213 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1214 charset.iso_final) = id;
1215 if (new_definition_p)
1216 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1217 Fcons (make_number (id), Qnil));
1218 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1219 charset_jisx0201_roman = id;
1220 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1221 charset_jisx0208_1978 = id;
1222 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1223 charset_jisx0208 = id;
1224 }
1225
1226 if (charset.emacs_mule_id >= 0)
1227 {
1228 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1229 if (charset.emacs_mule_id < 0xA0)
1230 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1231 else
1232 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1233 if (new_definition_p)
1234 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1235 Fcons (make_number (id), Qnil));
1236 }
1237
1238 if (new_definition_p)
1239 {
1240 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1241 if (charset.supplementary_p)
1242 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1243 Fcons (make_number (id), Qnil));
1244 else
1245 {
1246 Lisp_Object tail;
1247
1248 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1249 {
1250 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1251
1252 if (cs->supplementary_p)
1253 break;
1254 }
1255 if (EQ (tail, Vcharset_ordered_list))
1256 Vcharset_ordered_list = Fcons (make_number (id),
1257 Vcharset_ordered_list);
1258 else if (NILP (tail))
1259 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1260 Fcons (make_number (id), Qnil));
1261 else
1262 {
1263 val = Fcons (XCAR (tail), XCDR (tail));
1264 XSETCDR (tail, val);
1265 XSETCAR (tail, make_number (id));
1266 }
1267 }
1268 charset_ordered_list_tick++;
1269 }
1270
1271 return Qnil;
1272 }
1273
1274
1275 /* Same as Fdefine_charset_internal but arguments are more convenient
1276 to call from C (typically in syms_of_charset). This can define a
1277 charset of `offset' method only. Return the ID of the new
1278 charset. */
1279
1280 static int
1281 define_charset_internal (name, dimension, code_space, min_code, max_code,
1282 iso_final, iso_revision, emacs_mule_id,
1283 ascii_compatible, supplementary,
1284 code_offset)
1285 Lisp_Object name;
1286 int dimension;
1287 unsigned char *code_space;
1288 unsigned min_code, max_code;
1289 int iso_final, iso_revision, emacs_mule_id;
1290 int ascii_compatible, supplementary;
1291 int code_offset;
1292 {
1293 Lisp_Object args[charset_arg_max];
1294 Lisp_Object plist[14];
1295 Lisp_Object val;
1296 int i;
1297
1298 args[charset_arg_name] = name;
1299 args[charset_arg_dimension] = make_number (dimension);
1300 val = Fmake_vector (make_number (8), make_number (0));
1301 for (i = 0; i < 8; i++)
1302 ASET (val, i, make_number (code_space[i]));
1303 args[charset_arg_code_space] = val;
1304 args[charset_arg_min_code] = make_number (min_code);
1305 args[charset_arg_max_code] = make_number (max_code);
1306 args[charset_arg_iso_final]
1307 = (iso_final < 0 ? Qnil : make_number (iso_final));
1308 args[charset_arg_iso_revision] = make_number (iso_revision);
1309 args[charset_arg_emacs_mule_id]
1310 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1311 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1312 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1313 args[charset_arg_invalid_code] = Qnil;
1314 args[charset_arg_code_offset] = make_number (code_offset);
1315 args[charset_arg_map] = Qnil;
1316 args[charset_arg_subset] = Qnil;
1317 args[charset_arg_superset] = Qnil;
1318 args[charset_arg_unify_map] = Qnil;
1319
1320 plist[0] = intern (":name");
1321 plist[1] = args[charset_arg_name];
1322 plist[2] = intern (":dimension");
1323 plist[3] = args[charset_arg_dimension];
1324 plist[4] = intern (":code-space");
1325 plist[5] = args[charset_arg_code_space];
1326 plist[6] = intern (":iso-final-char");
1327 plist[7] = args[charset_arg_iso_final];
1328 plist[8] = intern (":emacs-mule-id");
1329 plist[9] = args[charset_arg_emacs_mule_id];
1330 plist[10] = intern (":ascii-compatible-p");
1331 plist[11] = args[charset_arg_ascii_compatible_p];
1332 plist[12] = intern (":code-offset");
1333 plist[13] = args[charset_arg_code_offset];
1334
1335 args[charset_arg_plist] = Flist (14, plist);
1336 Fdefine_charset_internal (charset_arg_max, args);
1337
1338 return XINT (CHARSET_SYMBOL_ID (name));
1339 }
1340
1341
1342 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1343 Sdefine_charset_alias, 2, 2, 0,
1344 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1345 (alias, charset)
1346 Lisp_Object alias, charset;
1347 {
1348 Lisp_Object attr;
1349
1350 CHECK_CHARSET_GET_ATTR (charset, attr);
1351 Fputhash (alias, attr, Vcharset_hash_table);
1352 Vcharset_list = Fcons (alias, Vcharset_list);
1353 return Qnil;
1354 }
1355
1356
1357 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1358 doc: /* Return the property list of CHARSET. */)
1359 (charset)
1360 Lisp_Object charset;
1361 {
1362 Lisp_Object attrs;
1363
1364 CHECK_CHARSET_GET_ATTR (charset, attrs);
1365 return CHARSET_ATTR_PLIST (attrs);
1366 }
1367
1368
1369 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1370 doc: /* Set CHARSET's property list to PLIST. */)
1371 (charset, plist)
1372 Lisp_Object charset, plist;
1373 {
1374 Lisp_Object attrs;
1375
1376 CHECK_CHARSET_GET_ATTR (charset, attrs);
1377 CHARSET_ATTR_PLIST (attrs) = plist;
1378 return plist;
1379 }
1380
1381
1382 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1383 doc: /* Unify characters of CHARSET with Unicode.
1384 This means reading the relevant file and installing the table defined
1385 by CHARSET's `:unify-map' property.
1386
1387 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1388 the same meaning as the `:unify-map' attribute in the function
1389 `define-charset' (which see).
1390
1391 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1392 (charset, unify_map, deunify)
1393 Lisp_Object charset, unify_map, deunify;
1394 {
1395 int id;
1396 struct charset *cs;
1397
1398 CHECK_CHARSET_GET_ID (charset, id);
1399 cs = CHARSET_FROM_ID (id);
1400 if (NILP (deunify)
1401 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1402 : ! CHARSET_UNIFIED_P (cs))
1403 return Qnil;
1404
1405 CHARSET_UNIFIED_P (cs) = 0;
1406 if (NILP (deunify))
1407 {
1408 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1409 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1410 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1411 if (NILP (unify_map))
1412 unify_map = CHARSET_UNIFY_MAP (cs);
1413 else
1414 {
1415 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1416 signal_error ("Bad unify-map", unify_map);
1417 CHARSET_UNIFY_MAP (cs) = unify_map;
1418 }
1419 if (NILP (Vchar_unify_table))
1420 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1421 char_table_set_range (Vchar_unify_table,
1422 cs->min_char, cs->max_char, charset);
1423 CHARSET_UNIFIED_P (cs) = 1;
1424 }
1425 else if (CHAR_TABLE_P (Vchar_unify_table))
1426 {
1427 int min_code = CHARSET_MIN_CODE (cs);
1428 int max_code = CHARSET_MAX_CODE (cs);
1429 int min_char = DECODE_CHAR (cs, min_code);
1430 int max_char = DECODE_CHAR (cs, max_code);
1431
1432 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1433 }
1434
1435 return Qnil;
1436 }
1437
1438 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1439 Sget_unused_iso_final_char, 2, 2, 0,
1440 doc: /*
1441 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1442 DIMENSION is the number of bytes to represent a character: 1 or 2.
1443 CHARS is the number of characters in a dimension: 94 or 96.
1444
1445 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1446 If there's no unused final char for the specified kind of charset,
1447 return nil. */)
1448 (dimension, chars)
1449 Lisp_Object dimension, chars;
1450 {
1451 int final_char;
1452
1453 CHECK_NUMBER (dimension);
1454 CHECK_NUMBER (chars);
1455 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1456 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1457 if (XINT (chars) != 94 && XINT (chars) != 96)
1458 args_out_of_range_3 (chars, make_number (94), make_number (96));
1459 for (final_char = '0'; final_char <= '?'; final_char++)
1460 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1461 break;
1462 return (final_char <= '?' ? make_number (final_char) : Qnil);
1463 }
1464
1465 static void
1466 check_iso_charset_parameter (dimension, chars, final_char)
1467 Lisp_Object dimension, chars, final_char;
1468 {
1469 CHECK_NATNUM (dimension);
1470 CHECK_NATNUM (chars);
1471 CHECK_NATNUM (final_char);
1472
1473 if (XINT (dimension) > 3)
1474 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1475 if (XINT (chars) != 94 && XINT (chars) != 96)
1476 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1477 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1478 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1479 }
1480
1481
1482 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1483 4, 4, 0,
1484 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1485
1486 On decoding by an ISO-2022 base coding system, when a charset
1487 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1488 if CHARSET is designated instead. */)
1489 (dimension, chars, final_char, charset)
1490 Lisp_Object dimension, chars, final_char, charset;
1491 {
1492 int id;
1493 int chars_flag;
1494
1495 CHECK_CHARSET_GET_ID (charset, id);
1496 check_iso_charset_parameter (dimension, chars, final_char);
1497 chars_flag = XINT (chars) == 96;
1498 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1499 return Qnil;
1500 }
1501
1502
1503 /* Return information about charsets in the text at PTR of NBYTES
1504 bytes, which are NCHARS characters. The value is:
1505
1506 0: Each character is represented by one byte. This is always
1507 true for a unibyte string. For a multibyte string, true if
1508 it contains only ASCII characters.
1509
1510 1: No charsets other than ascii, control-1, and latin-1 are
1511 found.
1512
1513 2: Otherwise.
1514 */
1515
1516 int
1517 string_xstring_p (string)
1518 Lisp_Object string;
1519 {
1520 const unsigned char *p = SDATA (string);
1521 const unsigned char *endp = p + SBYTES (string);
1522
1523 if (SCHARS (string) == SBYTES (string))
1524 return 0;
1525
1526 while (p < endp)
1527 {
1528 int c = STRING_CHAR_ADVANCE (p);
1529
1530 if (c >= 0x100)
1531 return 2;
1532 }
1533 return 1;
1534 }
1535
1536
1537 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1538
1539 CHARSETS is a vector. If Nth element is non-nil, it means the
1540 charset whose id is N is already found.
1541
1542 It may lookup a translation table TABLE if supplied. */
1543
1544 static void
1545 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1546 const unsigned char *ptr;
1547 EMACS_INT nchars, nbytes;
1548 Lisp_Object charsets, table;
1549 int multibyte;
1550 {
1551 const unsigned char *pend = ptr + nbytes;
1552
1553 if (nchars == nbytes)
1554 {
1555 if (multibyte)
1556 ASET (charsets, charset_ascii, Qt);
1557 else
1558 while (ptr < pend)
1559 {
1560 int c = *ptr++;
1561
1562 if (!NILP (table))
1563 c = translate_char (table, c);
1564 if (ASCII_BYTE_P (c))
1565 ASET (charsets, charset_ascii, Qt);
1566 else
1567 ASET (charsets, charset_eight_bit, Qt);
1568 }
1569 }
1570 else
1571 {
1572 while (ptr < pend)
1573 {
1574 int c = STRING_CHAR_ADVANCE (ptr);
1575 struct charset *charset;
1576
1577 if (!NILP (table))
1578 c = translate_char (table, c);
1579 charset = CHAR_CHARSET (c);
1580 ASET (charsets, CHARSET_ID (charset), Qt);
1581 }
1582 }
1583 }
1584
1585 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1586 2, 3, 0,
1587 doc: /* Return a list of charsets in the region between BEG and END.
1588 BEG and END are buffer positions.
1589 Optional arg TABLE if non-nil is a translation table to look up.
1590
1591 If the current buffer is unibyte, the returned list may contain
1592 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1593 (beg, end, table)
1594 Lisp_Object beg, end, table;
1595 {
1596 Lisp_Object charsets;
1597 EMACS_INT from, from_byte, to, stop, stop_byte;
1598 int i;
1599 Lisp_Object val;
1600 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1601
1602 validate_region (&beg, &end);
1603 from = XFASTINT (beg);
1604 stop = to = XFASTINT (end);
1605
1606 if (from < GPT && GPT < to)
1607 {
1608 stop = GPT;
1609 stop_byte = GPT_BYTE;
1610 }
1611 else
1612 stop_byte = CHAR_TO_BYTE (stop);
1613
1614 from_byte = CHAR_TO_BYTE (from);
1615
1616 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1617 while (1)
1618 {
1619 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1620 stop_byte - from_byte, charsets, table,
1621 multibyte);
1622 if (stop < to)
1623 {
1624 from = stop, from_byte = stop_byte;
1625 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1626 }
1627 else
1628 break;
1629 }
1630
1631 val = Qnil;
1632 for (i = charset_table_used - 1; i >= 0; i--)
1633 if (!NILP (AREF (charsets, i)))
1634 val = Fcons (CHARSET_NAME (charset_table + i), val);
1635 return val;
1636 }
1637
1638 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1639 1, 2, 0,
1640 doc: /* Return a list of charsets in STR.
1641 Optional arg TABLE if non-nil is a translation table to look up.
1642
1643 If STR is unibyte, the returned list may contain
1644 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1645 (str, table)
1646 Lisp_Object str, table;
1647 {
1648 Lisp_Object charsets;
1649 int i;
1650 Lisp_Object val;
1651
1652 CHECK_STRING (str);
1653
1654 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1655 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1656 charsets, table,
1657 STRING_MULTIBYTE (str));
1658 val = Qnil;
1659 for (i = charset_table_used - 1; i >= 0; i--)
1660 if (!NILP (AREF (charsets, i)))
1661 val = Fcons (CHARSET_NAME (charset_table + i), val);
1662 return val;
1663 }
1664
1665 \f
1666
1667 /* Return a unified character code for C (>= 0x110000). VAL is a
1668 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1669 charset symbol. */
1670 int
1671 maybe_unify_char (c, val)
1672 int c;
1673 Lisp_Object val;
1674 {
1675 struct charset *charset;
1676
1677 if (INTEGERP (val))
1678 return XINT (val);
1679 if (NILP (val))
1680 return c;
1681
1682 CHECK_CHARSET_GET_CHARSET (val, charset);
1683 load_charset (charset, 1);
1684 if (! inhibit_load_charset_map)
1685 {
1686 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1687 if (! NILP (val))
1688 c = XINT (val);
1689 }
1690 else
1691 {
1692 int code_index = c - CHARSET_CODE_OFFSET (charset);
1693 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1694
1695 if (unified > 0)
1696 c = unified;
1697 }
1698 return c;
1699 }
1700
1701
1702 /* Return a character correponding to the code-point CODE of
1703 CHARSET. */
1704
1705 int
1706 decode_char (charset, code)
1707 struct charset *charset;
1708 unsigned code;
1709 {
1710 int c, char_index;
1711 enum charset_method method = CHARSET_METHOD (charset);
1712
1713 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1714 return -1;
1715
1716 if (method == CHARSET_METHOD_SUBSET)
1717 {
1718 Lisp_Object subset_info;
1719
1720 subset_info = CHARSET_SUBSET (charset);
1721 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1722 code -= XINT (AREF (subset_info, 3));
1723 if (code < XFASTINT (AREF (subset_info, 1))
1724 || code > XFASTINT (AREF (subset_info, 2)))
1725 c = -1;
1726 else
1727 c = DECODE_CHAR (charset, code);
1728 }
1729 else if (method == CHARSET_METHOD_SUPERSET)
1730 {
1731 Lisp_Object parents;
1732
1733 parents = CHARSET_SUPERSET (charset);
1734 c = -1;
1735 for (; CONSP (parents); parents = XCDR (parents))
1736 {
1737 int id = XINT (XCAR (XCAR (parents)));
1738 int code_offset = XINT (XCDR (XCAR (parents)));
1739 unsigned this_code = code - code_offset;
1740
1741 charset = CHARSET_FROM_ID (id);
1742 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1743 break;
1744 }
1745 }
1746 else
1747 {
1748 char_index = CODE_POINT_TO_INDEX (charset, code);
1749 if (char_index < 0)
1750 return -1;
1751
1752 if (method == CHARSET_METHOD_MAP)
1753 {
1754 Lisp_Object decoder;
1755
1756 decoder = CHARSET_DECODER (charset);
1757 if (! VECTORP (decoder))
1758 {
1759 load_charset (charset, 1);
1760 decoder = CHARSET_DECODER (charset);
1761 }
1762 if (VECTORP (decoder))
1763 c = XINT (AREF (decoder, char_index));
1764 else
1765 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1766 }
1767 else /* method == CHARSET_METHOD_OFFSET */
1768 {
1769 c = char_index + CHARSET_CODE_OFFSET (charset);
1770 if (CHARSET_UNIFIED_P (charset)
1771 && c > MAX_UNICODE_CHAR)
1772 MAYBE_UNIFY_CHAR (c);
1773 }
1774 }
1775
1776 return c;
1777 }
1778
1779 /* Variable used temporarily by the macro ENCODE_CHAR. */
1780 Lisp_Object charset_work;
1781
1782 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1783 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1784 use CHARSET's strict_max_char instead of max_char. */
1785
1786 unsigned
1787 encode_char (charset, c)
1788 struct charset *charset;
1789 int c;
1790 {
1791 unsigned code;
1792 enum charset_method method = CHARSET_METHOD (charset);
1793
1794 if (CHARSET_UNIFIED_P (charset))
1795 {
1796 Lisp_Object deunifier, deunified;
1797 int code_index = -1;
1798
1799 deunifier = CHARSET_DEUNIFIER (charset);
1800 if (! CHAR_TABLE_P (deunifier))
1801 {
1802 load_charset (charset, 2);
1803 deunifier = CHARSET_DEUNIFIER (charset);
1804 }
1805 if (CHAR_TABLE_P (deunifier))
1806 {
1807 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1808
1809 if (INTEGERP (deunified))
1810 code_index = XINT (deunified);
1811 }
1812 else
1813 {
1814 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1815 }
1816 if (code_index >= 0)
1817 c = CHARSET_CODE_OFFSET (charset) + code_index;
1818 }
1819
1820 if (method == CHARSET_METHOD_SUBSET)
1821 {
1822 Lisp_Object subset_info;
1823 struct charset *this_charset;
1824
1825 subset_info = CHARSET_SUBSET (charset);
1826 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1827 code = ENCODE_CHAR (this_charset, c);
1828 if (code == CHARSET_INVALID_CODE (this_charset)
1829 || code < XFASTINT (AREF (subset_info, 1))
1830 || code > XFASTINT (AREF (subset_info, 2)))
1831 return CHARSET_INVALID_CODE (charset);
1832 code += XINT (AREF (subset_info, 3));
1833 return code;
1834 }
1835
1836 if (method == CHARSET_METHOD_SUPERSET)
1837 {
1838 Lisp_Object parents;
1839
1840 parents = CHARSET_SUPERSET (charset);
1841 for (; CONSP (parents); parents = XCDR (parents))
1842 {
1843 int id = XINT (XCAR (XCAR (parents)));
1844 int code_offset = XINT (XCDR (XCAR (parents)));
1845 struct charset *this_charset = CHARSET_FROM_ID (id);
1846
1847 code = ENCODE_CHAR (this_charset, c);
1848 if (code != CHARSET_INVALID_CODE (this_charset))
1849 return code + code_offset;
1850 }
1851 return CHARSET_INVALID_CODE (charset);
1852 }
1853
1854 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1855 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1856 return CHARSET_INVALID_CODE (charset);
1857
1858 if (method == CHARSET_METHOD_MAP)
1859 {
1860 Lisp_Object encoder;
1861 Lisp_Object val;
1862
1863 encoder = CHARSET_ENCODER (charset);
1864 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1865 {
1866 load_charset (charset, 2);
1867 encoder = CHARSET_ENCODER (charset);
1868 }
1869 if (CHAR_TABLE_P (encoder))
1870 {
1871 val = CHAR_TABLE_REF (encoder, c);
1872 if (NILP (val))
1873 return CHARSET_INVALID_CODE (charset);
1874 code = XINT (val);
1875 if (! CHARSET_COMPACT_CODES_P (charset))
1876 code = INDEX_TO_CODE_POINT (charset, code);
1877 }
1878 else
1879 {
1880 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1881 code = INDEX_TO_CODE_POINT (charset, code);
1882 }
1883 }
1884 else /* method == CHARSET_METHOD_OFFSET */
1885 {
1886 int code_index = c - CHARSET_CODE_OFFSET (charset);
1887
1888 code = INDEX_TO_CODE_POINT (charset, code_index);
1889 }
1890
1891 return code;
1892 }
1893
1894
1895 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1896 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1897 Return nil if CODE-POINT is not valid in CHARSET.
1898
1899 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1900
1901 Optional argument RESTRICTION specifies a way to map the pair of CCS
1902 and CODE-POINT to a character. Currently not supported and just ignored. */)
1903 (charset, code_point, restriction)
1904 Lisp_Object charset, code_point, restriction;
1905 {
1906 int c, id;
1907 unsigned code;
1908 struct charset *charsetp;
1909
1910 CHECK_CHARSET_GET_ID (charset, id);
1911 if (CONSP (code_point))
1912 {
1913 CHECK_NATNUM_CAR (code_point);
1914 CHECK_NATNUM_CDR (code_point);
1915 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1916 }
1917 else
1918 {
1919 CHECK_NATNUM (code_point);
1920 code = XINT (code_point);
1921 }
1922 charsetp = CHARSET_FROM_ID (id);
1923 c = DECODE_CHAR (charsetp, code);
1924 return (c >= 0 ? make_number (c) : Qnil);
1925 }
1926
1927
1928 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1929 doc: /* Encode the character CH into a code-point of CHARSET.
1930 Return nil if CHARSET doesn't include CH.
1931
1932 Optional argument RESTRICTION specifies a way to map CH to a
1933 code-point in CCS. Currently not supported and just ignored. */)
1934 (ch, charset, restriction)
1935 Lisp_Object ch, charset, restriction;
1936 {
1937 int id;
1938 unsigned code;
1939 struct charset *charsetp;
1940
1941 CHECK_CHARSET_GET_ID (charset, id);
1942 CHECK_NATNUM (ch);
1943 charsetp = CHARSET_FROM_ID (id);
1944 code = ENCODE_CHAR (charsetp, XINT (ch));
1945 if (code == CHARSET_INVALID_CODE (charsetp))
1946 return Qnil;
1947 if (code > 0x7FFFFFF)
1948 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1949 return make_number (code);
1950 }
1951
1952
1953 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1954 doc:
1955 /* Return a character of CHARSET whose position codes are CODEn.
1956
1957 CODE1 through CODE4 are optional, but if you don't supply sufficient
1958 position codes, it is assumed that the minimum code in each dimension
1959 is specified. */)
1960 (charset, code1, code2, code3, code4)
1961 Lisp_Object charset, code1, code2, code3, code4;
1962 {
1963 int id, dimension;
1964 struct charset *charsetp;
1965 unsigned code;
1966 int c;
1967
1968 CHECK_CHARSET_GET_ID (charset, id);
1969 charsetp = CHARSET_FROM_ID (id);
1970
1971 dimension = CHARSET_DIMENSION (charsetp);
1972 if (NILP (code1))
1973 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1974 ? 0 : CHARSET_MIN_CODE (charsetp));
1975 else
1976 {
1977 CHECK_NATNUM (code1);
1978 if (XFASTINT (code1) >= 0x100)
1979 args_out_of_range (make_number (0xFF), code1);
1980 code = XFASTINT (code1);
1981
1982 if (dimension > 1)
1983 {
1984 code <<= 8;
1985 if (NILP (code2))
1986 code |= charsetp->code_space[(dimension - 2) * 4];
1987 else
1988 {
1989 CHECK_NATNUM (code2);
1990 if (XFASTINT (code2) >= 0x100)
1991 args_out_of_range (make_number (0xFF), code2);
1992 code |= XFASTINT (code2);
1993 }
1994
1995 if (dimension > 2)
1996 {
1997 code <<= 8;
1998 if (NILP (code3))
1999 code |= charsetp->code_space[(dimension - 3) * 4];
2000 else
2001 {
2002 CHECK_NATNUM (code3);
2003 if (XFASTINT (code3) >= 0x100)
2004 args_out_of_range (make_number (0xFF), code3);
2005 code |= XFASTINT (code3);
2006 }
2007
2008 if (dimension > 3)
2009 {
2010 code <<= 8;
2011 if (NILP (code4))
2012 code |= charsetp->code_space[0];
2013 else
2014 {
2015 CHECK_NATNUM (code4);
2016 if (XFASTINT (code4) >= 0x100)
2017 args_out_of_range (make_number (0xFF), code4);
2018 code |= XFASTINT (code4);
2019 }
2020 }
2021 }
2022 }
2023 }
2024
2025 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2026 code &= 0x7F7F7F7F;
2027 c = DECODE_CHAR (charsetp, code);
2028 if (c < 0)
2029 error ("Invalid code(s)");
2030 return make_number (c);
2031 }
2032
2033
2034 /* Return the first charset in CHARSET_LIST that contains C.
2035 CHARSET_LIST is a list of charset IDs. If it is nil, use
2036 Vcharset_ordered_list. */
2037
2038 struct charset *
2039 char_charset (c, charset_list, code_return)
2040 int c;
2041 Lisp_Object charset_list;
2042 unsigned *code_return;
2043 {
2044 int maybe_null = 0;
2045
2046 if (NILP (charset_list))
2047 charset_list = Vcharset_ordered_list;
2048 else
2049 maybe_null = 1;
2050
2051 while (CONSP (charset_list))
2052 {
2053 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2054 unsigned code = ENCODE_CHAR (charset, c);
2055
2056 if (code != CHARSET_INVALID_CODE (charset))
2057 {
2058 if (code_return)
2059 *code_return = code;
2060 return charset;
2061 }
2062 charset_list = XCDR (charset_list);
2063 if (c <= MAX_UNICODE_CHAR
2064 && EQ (charset_list, Vcharset_non_preferred_head))
2065 return CHARSET_FROM_ID (charset_unicode);
2066 }
2067 return (maybe_null ? NULL
2068 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2069 : CHARSET_FROM_ID (charset_eight_bit));
2070 }
2071
2072
2073 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2074 doc:
2075 /*Return list of charset and one to four position-codes of CH.
2076 The charset is decided by the current priority order of charsets.
2077 A position-code is a byte value of each dimension of the code-point of
2078 CH in the charset. */)
2079 (ch)
2080 Lisp_Object ch;
2081 {
2082 struct charset *charset;
2083 int c, dimension;
2084 unsigned code;
2085 Lisp_Object val;
2086
2087 CHECK_CHARACTER (ch);
2088 c = XFASTINT (ch);
2089 charset = CHAR_CHARSET (c);
2090 if (! charset)
2091 abort ();
2092 code = ENCODE_CHAR (charset, c);
2093 if (code == CHARSET_INVALID_CODE (charset))
2094 abort ();
2095 dimension = CHARSET_DIMENSION (charset);
2096 for (val = Qnil; dimension > 0; dimension--)
2097 {
2098 val = Fcons (make_number (code & 0xFF), val);
2099 code >>= 8;
2100 }
2101 return Fcons (CHARSET_NAME (charset), val);
2102 }
2103
2104
2105 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2106 doc: /* Return the charset of highest priority that contains CH.
2107 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2108 from which to find the charset. It may also be a coding system. In
2109 that case, find the charset from what supported by that coding system. */)
2110 (ch, restriction)
2111 Lisp_Object ch, restriction;
2112 {
2113 struct charset *charset;
2114
2115 CHECK_CHARACTER (ch);
2116 if (NILP (restriction))
2117 charset = CHAR_CHARSET (XINT (ch));
2118 else
2119 {
2120 Lisp_Object charset_list;
2121
2122 if (CONSP (restriction))
2123 {
2124 for (charset_list = Qnil; CONSP (restriction);
2125 restriction = XCDR (restriction))
2126 {
2127 int id;
2128
2129 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2130 charset_list = Fcons (make_number (id), charset_list);
2131 }
2132 charset_list = Fnreverse (charset_list);
2133 }
2134 else
2135 charset_list = coding_system_charset_list (restriction);
2136 charset = char_charset (XINT (ch), charset_list, NULL);
2137 if (! charset)
2138 return Qnil;
2139 }
2140 return (CHARSET_NAME (charset));
2141 }
2142
2143
2144 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2145 doc: /*
2146 Return charset of a character in the current buffer at position POS.
2147 If POS is nil, it defauls to the current point.
2148 If POS is out of range, the value is nil. */)
2149 (pos)
2150 Lisp_Object pos;
2151 {
2152 Lisp_Object ch;
2153 struct charset *charset;
2154
2155 ch = Fchar_after (pos);
2156 if (! INTEGERP (ch))
2157 return ch;
2158 charset = CHAR_CHARSET (XINT (ch));
2159 return (CHARSET_NAME (charset));
2160 }
2161
2162
2163 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2164 doc: /*
2165 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2166
2167 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2168 by their DIMENSION, CHARS, and FINAL-CHAR,
2169 whereas Emacs distinguishes them by charset symbol.
2170 See the documentation of the function `charset-info' for the meanings of
2171 DIMENSION, CHARS, and FINAL-CHAR. */)
2172 (dimension, chars, final_char)
2173 Lisp_Object dimension, chars, final_char;
2174 {
2175 int id;
2176 int chars_flag;
2177
2178 check_iso_charset_parameter (dimension, chars, final_char);
2179 chars_flag = XFASTINT (chars) == 96;
2180 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2181 XFASTINT (final_char));
2182 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2183 }
2184
2185
2186 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2187 0, 0, 0,
2188 doc: /*
2189 Internal use only.
2190 Clear temporary charset mapping tables.
2191 It should be called only from temacs invoked for dumping. */)
2192 ()
2193 {
2194 int i;
2195 struct charset *charset;
2196 Lisp_Object attrs;
2197
2198 if (temp_charset_work)
2199 {
2200 free (temp_charset_work);
2201 temp_charset_work = NULL;
2202 }
2203
2204 if (CHAR_TABLE_P (Vchar_unify_table))
2205 Foptimize_char_table (Vchar_unify_table, Qnil);
2206
2207 return Qnil;
2208 }
2209
2210 DEFUN ("charset-priority-list", Fcharset_priority_list,
2211 Scharset_priority_list, 0, 1, 0,
2212 doc: /* Return the list of charsets ordered by priority.
2213 HIGHESTP non-nil means just return the highest priority one. */)
2214 (highestp)
2215 Lisp_Object highestp;
2216 {
2217 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2218
2219 if (!NILP (highestp))
2220 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2221
2222 while (!NILP (list))
2223 {
2224 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2225 list = XCDR (list);
2226 }
2227 return Fnreverse (val);
2228 }
2229
2230 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2231 1, MANY, 0,
2232 doc: /* Assign higher priority to the charsets given as arguments.
2233 usage: (set-charset-priority &rest charsets) */)
2234 (nargs, args)
2235 int nargs;
2236 Lisp_Object *args;
2237 {
2238 Lisp_Object new_head, old_list, arglist[2];
2239 Lisp_Object list_2022, list_emacs_mule;
2240 int i, id;
2241
2242 old_list = Fcopy_sequence (Vcharset_ordered_list);
2243 new_head = Qnil;
2244 for (i = 0; i < nargs; i++)
2245 {
2246 CHECK_CHARSET_GET_ID (args[i], id);
2247 if (! NILP (Fmemq (make_number (id), old_list)))
2248 {
2249 old_list = Fdelq (make_number (id), old_list);
2250 new_head = Fcons (make_number (id), new_head);
2251 }
2252 }
2253 arglist[0] = Fnreverse (new_head);
2254 arglist[1] = Vcharset_non_preferred_head = old_list;
2255 Vcharset_ordered_list = Fnconc (2, arglist);
2256 charset_ordered_list_tick++;
2257
2258 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2259 CONSP (old_list); old_list = XCDR (old_list))
2260 {
2261 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2262 list_2022 = Fcons (XCAR (old_list), list_2022);
2263 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2264 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2265 }
2266 Viso_2022_charset_list = Fnreverse (list_2022);
2267 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2268
2269 return Qnil;
2270 }
2271
2272 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2273 0, 1, 0,
2274 doc: /* Internal use only.
2275 Return charset identification number of CHARSET. */)
2276 (charset)
2277 Lisp_Object charset;
2278 {
2279 int id;
2280
2281 CHECK_CHARSET_GET_ID (charset, id);
2282 return make_number (id);
2283 }
2284
2285 \f
2286 void
2287 init_charset ()
2288 {
2289 Lisp_Object tempdir;
2290 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2291 if (access (SDATA (tempdir), 0) < 0)
2292 {
2293 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2294 Emacs will not function correctly without the character map files.\n\
2295 Please check your installation!\n",
2296 tempdir);
2297 /* TODO should this be a fatal error? (Bug#909) */
2298 }
2299
2300 Vcharset_map_path = Fcons (tempdir, Qnil);
2301 }
2302
2303
2304 void
2305 init_charset_once ()
2306 {
2307 int i, j, k;
2308
2309 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2310 for (j = 0; j < ISO_MAX_CHARS; j++)
2311 for (k = 0; k < ISO_MAX_FINAL; k++)
2312 iso_charset_table[i][j][k] = -1;
2313
2314 for (i = 0; i < 256; i++)
2315 emacs_mule_charset[i] = NULL;
2316
2317 charset_jisx0201_roman = -1;
2318 charset_jisx0208_1978 = -1;
2319 charset_jisx0208 = -1;
2320
2321 for (i = 0; i < 128; i++)
2322 unibyte_to_multibyte_table[i] = i;
2323 for (; i < 256; i++)
2324 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2325 }
2326
2327 #ifdef emacs
2328
2329 void
2330 syms_of_charset ()
2331 {
2332 DEFSYM (Qcharsetp, "charsetp");
2333
2334 DEFSYM (Qascii, "ascii");
2335 DEFSYM (Qunicode, "unicode");
2336 DEFSYM (Qemacs, "emacs");
2337 DEFSYM (Qeight_bit, "eight-bit");
2338 DEFSYM (Qiso_8859_1, "iso-8859-1");
2339
2340 DEFSYM (Qgl, "gl");
2341 DEFSYM (Qgr, "gr");
2342
2343 staticpro (&Vcharset_ordered_list);
2344 Vcharset_ordered_list = Qnil;
2345
2346 staticpro (&Viso_2022_charset_list);
2347 Viso_2022_charset_list = Qnil;
2348
2349 staticpro (&Vemacs_mule_charset_list);
2350 Vemacs_mule_charset_list = Qnil;
2351
2352 /* Don't staticpro them here. It's done in syms_of_fns. */
2353 QCtest = intern (":test");
2354 Qeq = intern ("eq");
2355
2356 staticpro (&Vcharset_hash_table);
2357 {
2358 Lisp_Object args[2];
2359 args[0] = QCtest;
2360 args[1] = Qeq;
2361 Vcharset_hash_table = Fmake_hash_table (2, args);
2362 }
2363
2364 charset_table_size = 128;
2365 charset_table = ((struct charset *)
2366 xmalloc (sizeof (struct charset) * charset_table_size));
2367 charset_table_used = 0;
2368
2369 defsubr (&Scharsetp);
2370 defsubr (&Smap_charset_chars);
2371 defsubr (&Sdefine_charset_internal);
2372 defsubr (&Sdefine_charset_alias);
2373 defsubr (&Scharset_plist);
2374 defsubr (&Sset_charset_plist);
2375 defsubr (&Sunify_charset);
2376 defsubr (&Sget_unused_iso_final_char);
2377 defsubr (&Sdeclare_equiv_charset);
2378 defsubr (&Sfind_charset_region);
2379 defsubr (&Sfind_charset_string);
2380 defsubr (&Sdecode_char);
2381 defsubr (&Sencode_char);
2382 defsubr (&Ssplit_char);
2383 defsubr (&Smake_char);
2384 defsubr (&Schar_charset);
2385 defsubr (&Scharset_after);
2386 defsubr (&Siso_charset);
2387 defsubr (&Sclear_charset_maps);
2388 defsubr (&Scharset_priority_list);
2389 defsubr (&Sset_charset_priority);
2390 defsubr (&Scharset_id_internal);
2391
2392 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2393 doc: /* *List of directories to search for charset map files. */);
2394 Vcharset_map_path = Qnil;
2395
2396 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2397 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2398 inhibit_load_charset_map = 0;
2399
2400 DEFVAR_LISP ("charset-list", &Vcharset_list,
2401 doc: /* List of all charsets ever defined. */);
2402 Vcharset_list = Qnil;
2403
2404 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2405 doc: /* ISO639 language mnemonic symbol for the current language environment.
2406 If the current language environment is for multiple languages (e.g. "Latin-1"),
2407 the value may be a list of mnemonics. */);
2408 Vcurrent_iso639_language = Qnil;
2409
2410 charset_ascii
2411 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2412 0, 127, 'B', -1, 0, 1, 0, 0);
2413 charset_iso_8859_1
2414 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2415 0, 255, -1, -1, -1, 1, 0, 0);
2416 charset_unicode
2417 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2418 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2419 charset_emacs
2420 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2421 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2422 charset_eight_bit
2423 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2424 128, 255, -1, 0, -1, 0, 1,
2425 MAX_5_BYTE_CHAR + 1);
2426 }
2427
2428 #endif /* emacs */
2429
2430 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2431 (do not change this comment) */