Fix typos in comment.
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77 Lisp_Object Qemacs;
78
79 /* The corresponding charsets. */
80 int charset_ascii;
81 int charset_eight_bit;
82 int charset_iso_8859_1;
83 int charset_unicode;
84 int charset_emacs;
85
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
90
91 /* Value of charset attribute `charset-iso-plane'. */
92 Lisp_Object Qgl, Qgr;
93
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
96
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
99
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
103
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
108
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
111
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
114
115 struct charset *emacs_mule_charset[256];
116
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
121 Lisp_Object Vcharset_map_path;
122
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map;
125
126 Lisp_Object Vcurrent_iso639_language;
127
128 /* Defined in chartab.c */
129 extern void
130 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
134
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
150 : -1)
151
152
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
155
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
169
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
172
173 static struct
174 {
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203 } *temp_charset_work;
204
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
227 \f
228
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded;
232
233 struct charset_map_entries
234 {
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
240 };
241
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
251
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
257
258 If CONTROL_FLAG is 2, setup the following tables.
259
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265 */
266
267 static void
268 load_charset_map (charset, entries, n_entries, control_flag)
269 struct charset *charset;
270 struct charset_map_entries *entries;
271 int n_entries;
272 int control_flag;
273 {
274 Lisp_Object vec, table;
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
278 int i;
279 unsigned char *fast_map = charset->fast_map;
280
281 if (n_entries <= 0)
282 return;
283
284 if (control_flag)
285 {
286 if (! inhibit_load_charset_map)
287 {
288 if (control_flag == 1)
289 {
290 if (charset->method == CHARSET_METHOD_MAP)
291 {
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
293
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
333 charset_map_loaded = 1;
334 }
335
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
339 {
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
344
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
352 {
353 to_index = from_index;
354 to_c = from_c;
355 }
356 else
357 {
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
360 }
361 if (from_index < 0 || to_index < 0)
362 continue;
363
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
368
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
418
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
421 }
422 }
423
424 if (control_flag == 0)
425 {
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
429 }
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
435 }
436
437
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
440
441 static INLINE unsigned
442 read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
445 {
446 int c;
447 unsigned n;
448
449 while ((c = getc (fp)) != EOF)
450 {
451 if (c == '#')
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
460 }
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
478 }
479
480 extern Lisp_Object Qfile_name_handler_alist;
481
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
484 0xAAAA 0xCCCC
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
487 0xAAAA-0xBBBB 0xCCCC
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
490
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
494 range.
495
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
498
499 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
500
501 static void
502 load_charset_map_from_file (charset, mapfile, control_flag)
503 struct charset *charset;
504 Lisp_Object mapfile;
505 int control_flag;
506 {
507 unsigned min_code = CHARSET_MIN_CODE (charset);
508 unsigned max_code = CHARSET_MAX_CODE (charset);
509 int fd;
510 FILE *fp;
511 int eof;
512 Lisp_Object suffixes;
513 struct charset_map_entries *head, *entries;
514 int n_entries;
515 int count = SPECPDL_INDEX ();
516
517 suffixes = Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil));
519
520 specbind (Qfile_name_handler_alist, Qnil);
521 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
522 unbind_to (count, Qnil);
523 if (fd < 0
524 || ! (fp = fdopen (fd, "r")))
525 {
526 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
527 return;
528 }
529
530 head = entries = ((struct charset_map_entries *)
531 alloca (sizeof (struct charset_map_entries)));
532 n_entries = 0;
533 eof = 0;
534 while (1)
535 {
536 unsigned from, to;
537 int c;
538 int idx;
539
540 from = read_hex (fp, &eof);
541 if (eof)
542 break;
543 if (getc (fp) == '-')
544 to = read_hex (fp, &eof);
545 else
546 to = from;
547 c = (int) read_hex (fp, &eof);
548
549 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
550 continue;
551
552 if (n_entries > 0 && (n_entries % 0x10000) == 0)
553 {
554 entries->next = ((struct charset_map_entries *)
555 alloca (sizeof (struct charset_map_entries)));
556 entries = entries->next;
557 }
558 idx = n_entries % 0x10000;
559 entries->entry[idx].from = from;
560 entries->entry[idx].to = to;
561 entries->entry[idx].c = c;
562 n_entries++;
563 }
564 fclose (fp);
565 close (fd);
566
567 load_charset_map (charset, head, n_entries, control_flag);
568 }
569
570 static void
571 load_charset_map_from_vector (charset, vec, control_flag)
572 struct charset *charset;
573 Lisp_Object vec;
574 int control_flag;
575 {
576 unsigned min_code = CHARSET_MIN_CODE (charset);
577 unsigned max_code = CHARSET_MAX_CODE (charset);
578 struct charset_map_entries *head, *entries;
579 int n_entries;
580 int len = ASIZE (vec);
581 int i;
582
583 if (len % 2 == 1)
584 {
585 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
586 return;
587 }
588
589 head = entries = ((struct charset_map_entries *)
590 alloca (sizeof (struct charset_map_entries)));
591 n_entries = 0;
592 for (i = 0; i < len; i += 2)
593 {
594 Lisp_Object val, val2;
595 unsigned from, to;
596 int c;
597 int idx;
598
599 val = AREF (vec, i);
600 if (CONSP (val))
601 {
602 val2 = XCDR (val);
603 val = XCAR (val);
604 CHECK_NATNUM (val);
605 CHECK_NATNUM (val2);
606 from = XFASTINT (val);
607 to = XFASTINT (val2);
608 }
609 else
610 {
611 CHECK_NATNUM (val);
612 from = to = XFASTINT (val);
613 }
614 val = AREF (vec, i + 1);
615 CHECK_NATNUM (val);
616 c = XFASTINT (val);
617
618 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
619 continue;
620
621 if (n_entries > 0 && (n_entries % 0x10000) == 0)
622 {
623 entries->next = ((struct charset_map_entries *)
624 alloca (sizeof (struct charset_map_entries)));
625 entries = entries->next;
626 }
627 idx = n_entries % 0x10000;
628 entries->entry[idx].from = from;
629 entries->entry[idx].to = to;
630 entries->entry[idx].c = c;
631 n_entries++;
632 }
633
634 load_charset_map (charset, head, n_entries, control_flag);
635 }
636
637
638 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
639 map it is (see the comment of load_charset_map for the detail). */
640
641 static void
642 load_charset (charset, control_flag)
643 struct charset *charset;
644 int control_flag;
645 {
646 Lisp_Object map;
647
648 if (inhibit_load_charset_map
649 && temp_charset_work
650 && charset == temp_charset_work->current
651 && (control_flag == 2 == temp_charset_work->for_encoder))
652 return;
653
654 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
655 map = CHARSET_MAP (charset);
656 else if (CHARSET_UNIFIED_P (charset))
657 map = CHARSET_UNIFY_MAP (charset);
658 if (STRINGP (map))
659 load_charset_map_from_file (charset, map, control_flag);
660 else
661 load_charset_map_from_vector (charset, map, control_flag);
662 }
663
664
665 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
666 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
667 (object)
668 Lisp_Object object;
669 {
670 return (CHARSETP (object) ? Qt : Qnil);
671 }
672
673
674 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
675 Lisp_Object function, Lisp_Object arg,
676 unsigned from, unsigned to));
677
678 void
679 map_charset_for_dump (c_function, function, arg, from, to)
680 void (*c_function) (Lisp_Object, Lisp_Object);
681 Lisp_Object function, arg;
682 unsigned from, to;
683 {
684 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
685 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
686 Lisp_Object range;
687 int c, stop;
688 struct gcpro gcpro1;
689
690 range = Fcons (Qnil, Qnil);
691 GCPRO1 (range);
692
693 c = temp_charset_work->min_char;
694 stop = (temp_charset_work->max_char < 0x20000
695 ? temp_charset_work->max_char : 0xFFFF);
696
697 while (1)
698 {
699 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
700
701 if (index >= from_idx && index <= to_idx)
702 {
703 if (NILP (XCAR (range)))
704 XSETCAR (range, make_number (c));
705 }
706 else if (! NILP (XCAR (range)))
707 {
708 XSETCDR (range, make_number (c - 1));
709 if (c_function)
710 (*c_function) (arg, range);
711 else
712 call2 (function, range, arg);
713 XSETCAR (range, Qnil);
714 }
715 if (c == stop)
716 {
717 if (c == temp_charset_work->max_char)
718 {
719 if (! NILP (XCAR (range)))
720 {
721 XSETCDR (range, make_number (c));
722 if (c_function)
723 (*c_function) (arg, range);
724 else
725 call2 (function, range, arg);
726 }
727 break;
728 }
729 c = 0x1FFFF;
730 stop = temp_charset_work->max_char;
731 }
732 c++;
733 }
734 }
735
736 void
737 map_charset_chars (c_function, function, arg,
738 charset, from, to)
739 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
740 Lisp_Object function, arg;
741 struct charset *charset;
742 unsigned from, to;
743 {
744 Lisp_Object range;
745 int partial;
746
747 partial = (from > CHARSET_MIN_CODE (charset)
748 || to < CHARSET_MAX_CODE (charset));
749
750 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
751 {
752 int from_idx = CODE_POINT_TO_INDEX (charset, from);
753 int to_idx = CODE_POINT_TO_INDEX (charset, to);
754 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
755 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
756
757 if (CHARSET_UNIFIED_P (charset))
758 {
759 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
760 load_charset (charset, 2);
761 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
762 map_char_table_for_charset (c_function, function,
763 CHARSET_DEUNIFIER (charset), arg,
764 partial ? charset : NULL, from, to);
765 else
766 map_charset_for_dump (c_function, function, arg, from, to);
767 }
768
769 range = Fcons (make_number (from_c), make_number (to_c));
770 if (NILP (function))
771 (*c_function) (arg, range);
772 else
773 call2 (function, range, arg);
774 }
775 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
776 {
777 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
778 load_charset (charset, 2);
779 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
780 map_char_table_for_charset (c_function, function,
781 CHARSET_ENCODER (charset), arg,
782 partial ? charset : NULL, from, to);
783 else
784 map_charset_for_dump (c_function, function, arg, from, to);
785 }
786 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
787 {
788 Lisp_Object subset_info;
789 int offset;
790
791 subset_info = CHARSET_SUBSET (charset);
792 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
793 offset = XINT (AREF (subset_info, 3));
794 from -= offset;
795 if (from < XFASTINT (AREF (subset_info, 1)))
796 from = XFASTINT (AREF (subset_info, 1));
797 to -= offset;
798 if (to > XFASTINT (AREF (subset_info, 2)))
799 to = XFASTINT (AREF (subset_info, 2));
800 map_charset_chars (c_function, function, arg, charset, from, to);
801 }
802 else /* i.e. CHARSET_METHOD_SUPERSET */
803 {
804 Lisp_Object parents;
805
806 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
807 parents = XCDR (parents))
808 {
809 int offset;
810 unsigned this_from, this_to;
811
812 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
813 offset = XINT (XCDR (XCAR (parents)));
814 this_from = from - offset;
815 this_to = to - offset;
816 if (this_from < CHARSET_MIN_CODE (charset))
817 this_from = CHARSET_MIN_CODE (charset);
818 if (this_to > CHARSET_MAX_CODE (charset))
819 this_to = CHARSET_MAX_CODE (charset);
820 map_charset_chars (c_function, function, arg, charset,
821 this_from, this_to);
822 }
823 }
824 }
825
826 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
827 doc: /* Call FUNCTION for all characters in CHARSET.
828 FUNCTION is called with an argument RANGE and the optional 3rd
829 argument ARG.
830
831 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
832 characters contained in CHARSET.
833
834 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
835 range of code points of target characters. */)
836 (function, charset, arg, from_code, to_code)
837 Lisp_Object function, charset, arg, from_code, to_code;
838 {
839 struct charset *cs;
840 unsigned from, to;
841
842 CHECK_CHARSET_GET_CHARSET (charset, cs);
843 if (NILP (from_code))
844 from = CHARSET_MIN_CODE (cs);
845 else
846 {
847 CHECK_NATNUM (from_code);
848 from = XINT (from_code);
849 if (from < CHARSET_MIN_CODE (cs))
850 from = CHARSET_MIN_CODE (cs);
851 }
852 if (NILP (to_code))
853 to = CHARSET_MAX_CODE (cs);
854 else
855 {
856 CHECK_NATNUM (to_code);
857 to = XINT (to_code);
858 if (to > CHARSET_MAX_CODE (cs))
859 to = CHARSET_MAX_CODE (cs);
860 }
861 map_charset_chars (NULL, function, arg, cs, from, to);
862 return Qnil;
863 }
864
865
866 /* Define a charset according to the arguments. The Nth argument is
867 the Nth attribute of the charset (the last attribute `charset-id'
868 is not included). See the docstring of `define-charset' for the
869 detail. */
870
871 DEFUN ("define-charset-internal", Fdefine_charset_internal,
872 Sdefine_charset_internal, charset_arg_max, MANY, 0,
873 doc: /* For internal use only.
874 usage: (define-charset-internal ...) */)
875 (nargs, args)
876 int nargs;
877 Lisp_Object *args;
878 {
879 /* Charset attr vector. */
880 Lisp_Object attrs;
881 Lisp_Object val;
882 unsigned hash_code;
883 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
884 int i, j;
885 struct charset charset;
886 int id;
887 int dimension;
888 int new_definition_p;
889 int nchars;
890
891 if (nargs != charset_arg_max)
892 return Fsignal (Qwrong_number_of_arguments,
893 Fcons (intern ("define-charset-internal"),
894 make_number (nargs)));
895
896 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
897
898 CHECK_SYMBOL (args[charset_arg_name]);
899 ASET (attrs, charset_name, args[charset_arg_name]);
900
901 val = args[charset_arg_code_space];
902 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
903 {
904 int min_byte, max_byte;
905
906 min_byte = XINT (Faref (val, make_number (i * 2)));
907 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
908 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
909 error ("Invalid :code-space value");
910 charset.code_space[i * 4] = min_byte;
911 charset.code_space[i * 4 + 1] = max_byte;
912 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
913 nchars *= charset.code_space[i * 4 + 2];
914 charset.code_space[i * 4 + 3] = nchars;
915 if (max_byte > 0)
916 dimension = i + 1;
917 }
918
919 val = args[charset_arg_dimension];
920 if (NILP (val))
921 charset.dimension = dimension;
922 else
923 {
924 CHECK_NATNUM (val);
925 charset.dimension = XINT (val);
926 if (charset.dimension < 1 || charset.dimension > 4)
927 args_out_of_range_3 (val, make_number (1), make_number (4));
928 }
929
930 charset.code_linear_p
931 = (charset.dimension == 1
932 || (charset.code_space[2] == 256
933 && (charset.dimension == 2
934 || (charset.code_space[6] == 256
935 && (charset.dimension == 3
936 || charset.code_space[10] == 256)))));
937
938 if (! charset.code_linear_p)
939 {
940 charset.code_space_mask = (unsigned char *) xmalloc (256);
941 bzero (charset.code_space_mask, 256);
942 for (i = 0; i < 4; i++)
943 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
944 j++)
945 charset.code_space_mask[j] |= (1 << i);
946 }
947
948 charset.iso_chars_96 = charset.code_space[2] == 96;
949
950 charset.min_code = (charset.code_space[0]
951 | (charset.code_space[4] << 8)
952 | (charset.code_space[8] << 16)
953 | (charset.code_space[12] << 24));
954 charset.max_code = (charset.code_space[1]
955 | (charset.code_space[5] << 8)
956 | (charset.code_space[9] << 16)
957 | (charset.code_space[13] << 24));
958 charset.char_index_offset = 0;
959
960 val = args[charset_arg_min_code];
961 if (! NILP (val))
962 {
963 unsigned code;
964
965 if (INTEGERP (val))
966 code = XINT (val);
967 else
968 {
969 CHECK_CONS (val);
970 CHECK_NUMBER_CAR (val);
971 CHECK_NUMBER_CDR (val);
972 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
973 }
974 if (code < charset.min_code
975 || code > charset.max_code)
976 args_out_of_range_3 (make_number (charset.min_code),
977 make_number (charset.max_code), val);
978 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
979 charset.min_code = code;
980 }
981
982 val = args[charset_arg_max_code];
983 if (! NILP (val))
984 {
985 unsigned code;
986
987 if (INTEGERP (val))
988 code = XINT (val);
989 else
990 {
991 CHECK_CONS (val);
992 CHECK_NUMBER_CAR (val);
993 CHECK_NUMBER_CDR (val);
994 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
995 }
996 if (code < charset.min_code
997 || code > charset.max_code)
998 args_out_of_range_3 (make_number (charset.min_code),
999 make_number (charset.max_code), val);
1000 charset.max_code = code;
1001 }
1002
1003 charset.compact_codes_p = charset.max_code < 0x10000;
1004
1005 val = args[charset_arg_invalid_code];
1006 if (NILP (val))
1007 {
1008 if (charset.min_code > 0)
1009 charset.invalid_code = 0;
1010 else
1011 {
1012 XSETINT (val, charset.max_code + 1);
1013 if (XINT (val) == charset.max_code + 1)
1014 charset.invalid_code = charset.max_code + 1;
1015 else
1016 error ("Attribute :invalid-code must be specified");
1017 }
1018 }
1019 else
1020 {
1021 CHECK_NATNUM (val);
1022 charset.invalid_code = XFASTINT (val);
1023 }
1024
1025 val = args[charset_arg_iso_final];
1026 if (NILP (val))
1027 charset.iso_final = -1;
1028 else
1029 {
1030 CHECK_NUMBER (val);
1031 if (XINT (val) < '0' || XINT (val) > 127)
1032 error ("Invalid iso-final-char: %d", XINT (val));
1033 charset.iso_final = XINT (val);
1034 }
1035
1036 val = args[charset_arg_iso_revision];
1037 if (NILP (val))
1038 charset.iso_revision = -1;
1039 else
1040 {
1041 CHECK_NUMBER (val);
1042 if (XINT (val) > 63)
1043 args_out_of_range (make_number (63), val);
1044 charset.iso_revision = XINT (val);
1045 }
1046
1047 val = args[charset_arg_emacs_mule_id];
1048 if (NILP (val))
1049 charset.emacs_mule_id = -1;
1050 else
1051 {
1052 CHECK_NATNUM (val);
1053 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1054 error ("Invalid emacs-mule-id: %d", XINT (val));
1055 charset.emacs_mule_id = XINT (val);
1056 }
1057
1058 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1059
1060 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1061
1062 charset.unified_p = 0;
1063
1064 bzero (charset.fast_map, sizeof (charset.fast_map));
1065
1066 if (! NILP (args[charset_arg_code_offset]))
1067 {
1068 val = args[charset_arg_code_offset];
1069 CHECK_NUMBER (val);
1070
1071 charset.method = CHARSET_METHOD_OFFSET;
1072 charset.code_offset = XINT (val);
1073
1074 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1075 charset.min_char = i + charset.code_offset;
1076 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1077 charset.max_char = i + charset.code_offset;
1078 if (charset.max_char > MAX_CHAR)
1079 error ("Unsupported max char: %d", charset.max_char);
1080
1081 i = (charset.min_char >> 7) << 7;
1082 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1083 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1084 i = (i >> 12) << 12;
1085 for (; i <= charset.max_char; i += 0x1000)
1086 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1087 }
1088 else if (! NILP (args[charset_arg_map]))
1089 {
1090 val = args[charset_arg_map];
1091 ASET (attrs, charset_map, val);
1092 charset.method = CHARSET_METHOD_MAP;
1093 }
1094 else if (! NILP (args[charset_arg_subset]))
1095 {
1096 Lisp_Object parent;
1097 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1098 struct charset *parent_charset;
1099
1100 val = args[charset_arg_subset];
1101 parent = Fcar (val);
1102 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1103 parent_min_code = Fnth (make_number (1), val);
1104 CHECK_NATNUM (parent_min_code);
1105 parent_max_code = Fnth (make_number (2), val);
1106 CHECK_NATNUM (parent_max_code);
1107 parent_code_offset = Fnth (make_number (3), val);
1108 CHECK_NUMBER (parent_code_offset);
1109 val = Fmake_vector (make_number (4), Qnil);
1110 ASET (val, 0, make_number (parent_charset->id));
1111 ASET (val, 1, parent_min_code);
1112 ASET (val, 2, parent_max_code);
1113 ASET (val, 3, parent_code_offset);
1114 ASET (attrs, charset_subset, val);
1115
1116 charset.method = CHARSET_METHOD_SUBSET;
1117 /* Here, we just copy the parent's fast_map. It's not accurate,
1118 but at least it works for quickly detecting which character
1119 DOESN'T belong to this charset. */
1120 for (i = 0; i < 190; i++)
1121 charset.fast_map[i] = parent_charset->fast_map[i];
1122
1123 /* We also copy these for parents. */
1124 charset.min_char = parent_charset->min_char;
1125 charset.max_char = parent_charset->max_char;
1126 }
1127 else if (! NILP (args[charset_arg_superset]))
1128 {
1129 val = args[charset_arg_superset];
1130 charset.method = CHARSET_METHOD_SUPERSET;
1131 val = Fcopy_sequence (val);
1132 ASET (attrs, charset_superset, val);
1133
1134 charset.min_char = MAX_CHAR;
1135 charset.max_char = 0;
1136 for (; ! NILP (val); val = Fcdr (val))
1137 {
1138 Lisp_Object elt, car_part, cdr_part;
1139 int this_id, offset;
1140 struct charset *this_charset;
1141
1142 elt = Fcar (val);
1143 if (CONSP (elt))
1144 {
1145 car_part = XCAR (elt);
1146 cdr_part = XCDR (elt);
1147 CHECK_CHARSET_GET_ID (car_part, this_id);
1148 CHECK_NUMBER (cdr_part);
1149 offset = XINT (cdr_part);
1150 }
1151 else
1152 {
1153 CHECK_CHARSET_GET_ID (elt, this_id);
1154 offset = 0;
1155 }
1156 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1157
1158 this_charset = CHARSET_FROM_ID (this_id);
1159 if (charset.min_char > this_charset->min_char)
1160 charset.min_char = this_charset->min_char;
1161 if (charset.max_char < this_charset->max_char)
1162 charset.max_char = this_charset->max_char;
1163 for (i = 0; i < 190; i++)
1164 charset.fast_map[i] |= this_charset->fast_map[i];
1165 }
1166 }
1167 else
1168 error ("None of :code-offset, :map, :parents are specified");
1169
1170 val = args[charset_arg_unify_map];
1171 if (! NILP (val) && !STRINGP (val))
1172 CHECK_VECTOR (val);
1173 ASET (attrs, charset_unify_map, val);
1174
1175 CHECK_LIST (args[charset_arg_plist]);
1176 ASET (attrs, charset_plist, args[charset_arg_plist]);
1177
1178 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1179 &hash_code);
1180 if (charset.hash_index >= 0)
1181 {
1182 new_definition_p = 0;
1183 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1184 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1185 }
1186 else
1187 {
1188 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1189 hash_code);
1190 if (charset_table_used == charset_table_size)
1191 {
1192 struct charset *new_table
1193 = (struct charset *) xmalloc (sizeof (struct charset)
1194 * (charset_table_size + 16));
1195 bcopy (charset_table, new_table,
1196 sizeof (struct charset) * charset_table_size);
1197 charset_table_size += 16;
1198 charset_table = new_table;
1199 }
1200 id = charset_table_used++;
1201 new_definition_p = 1;
1202 }
1203
1204 ASET (attrs, charset_id, make_number (id));
1205 charset.id = id;
1206 charset_table[id] = charset;
1207
1208 if (charset.method == CHARSET_METHOD_MAP)
1209 {
1210 load_charset (&charset, 0);
1211 charset_table[id] = charset;
1212 }
1213
1214 if (charset.iso_final >= 0)
1215 {
1216 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1217 charset.iso_final) = id;
1218 if (new_definition_p)
1219 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1220 Fcons (make_number (id), Qnil));
1221 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1222 charset_jisx0201_roman = id;
1223 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1224 charset_jisx0208_1978 = id;
1225 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1226 charset_jisx0208 = id;
1227 }
1228
1229 if (charset.emacs_mule_id >= 0)
1230 {
1231 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1232 if (charset.emacs_mule_id < 0xA0)
1233 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1234 else
1235 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1236 if (new_definition_p)
1237 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1238 Fcons (make_number (id), Qnil));
1239 }
1240
1241 if (new_definition_p)
1242 {
1243 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1244 if (charset.supplementary_p)
1245 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1246 Fcons (make_number (id), Qnil));
1247 else
1248 {
1249 Lisp_Object tail;
1250
1251 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1252 {
1253 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1254
1255 if (cs->supplementary_p)
1256 break;
1257 }
1258 if (EQ (tail, Vcharset_ordered_list))
1259 Vcharset_ordered_list = Fcons (make_number (id),
1260 Vcharset_ordered_list);
1261 else if (NILP (tail))
1262 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1263 Fcons (make_number (id), Qnil));
1264 else
1265 {
1266 val = Fcons (XCAR (tail), XCDR (tail));
1267 XSETCDR (tail, val);
1268 XSETCAR (tail, make_number (id));
1269 }
1270 }
1271 charset_ordered_list_tick++;
1272 }
1273
1274 return Qnil;
1275 }
1276
1277
1278 /* Same as Fdefine_charset_internal but arguments are more convenient
1279 to call from C (typically in syms_of_charset). This can define a
1280 charset of `offset' method only. Return the ID of the new
1281 charset. */
1282
1283 static int
1284 define_charset_internal (name, dimension, code_space, min_code, max_code,
1285 iso_final, iso_revision, emacs_mule_id,
1286 ascii_compatible, supplementary,
1287 code_offset)
1288 Lisp_Object name;
1289 int dimension;
1290 unsigned char *code_space;
1291 unsigned min_code, max_code;
1292 int iso_final, iso_revision, emacs_mule_id;
1293 int ascii_compatible, supplementary;
1294 int code_offset;
1295 {
1296 Lisp_Object args[charset_arg_max];
1297 Lisp_Object plist[14];
1298 Lisp_Object val;
1299 int i;
1300
1301 args[charset_arg_name] = name;
1302 args[charset_arg_dimension] = make_number (dimension);
1303 val = Fmake_vector (make_number (8), make_number (0));
1304 for (i = 0; i < 8; i++)
1305 ASET (val, i, make_number (code_space[i]));
1306 args[charset_arg_code_space] = val;
1307 args[charset_arg_min_code] = make_number (min_code);
1308 args[charset_arg_max_code] = make_number (max_code);
1309 args[charset_arg_iso_final]
1310 = (iso_final < 0 ? Qnil : make_number (iso_final));
1311 args[charset_arg_iso_revision] = make_number (iso_revision);
1312 args[charset_arg_emacs_mule_id]
1313 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1314 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1315 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1316 args[charset_arg_invalid_code] = Qnil;
1317 args[charset_arg_code_offset] = make_number (code_offset);
1318 args[charset_arg_map] = Qnil;
1319 args[charset_arg_subset] = Qnil;
1320 args[charset_arg_superset] = Qnil;
1321 args[charset_arg_unify_map] = Qnil;
1322
1323 plist[0] = intern (":name");
1324 plist[1] = args[charset_arg_name];
1325 plist[2] = intern (":dimension");
1326 plist[3] = args[charset_arg_dimension];
1327 plist[4] = intern (":code-space");
1328 plist[5] = args[charset_arg_code_space];
1329 plist[6] = intern (":iso-final-char");
1330 plist[7] = args[charset_arg_iso_final];
1331 plist[8] = intern (":emacs-mule-id");
1332 plist[9] = args[charset_arg_emacs_mule_id];
1333 plist[10] = intern (":ascii-compatible-p");
1334 plist[11] = args[charset_arg_ascii_compatible_p];
1335 plist[12] = intern (":code-offset");
1336 plist[13] = args[charset_arg_code_offset];
1337
1338 args[charset_arg_plist] = Flist (14, plist);
1339 Fdefine_charset_internal (charset_arg_max, args);
1340
1341 return XINT (CHARSET_SYMBOL_ID (name));
1342 }
1343
1344
1345 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1346 Sdefine_charset_alias, 2, 2, 0,
1347 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1348 (alias, charset)
1349 Lisp_Object alias, charset;
1350 {
1351 Lisp_Object attr;
1352
1353 CHECK_CHARSET_GET_ATTR (charset, attr);
1354 Fputhash (alias, attr, Vcharset_hash_table);
1355 Vcharset_list = Fcons (alias, Vcharset_list);
1356 return Qnil;
1357 }
1358
1359
1360 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1361 doc: /* Return the property list of CHARSET. */)
1362 (charset)
1363 Lisp_Object charset;
1364 {
1365 Lisp_Object attrs;
1366
1367 CHECK_CHARSET_GET_ATTR (charset, attrs);
1368 return CHARSET_ATTR_PLIST (attrs);
1369 }
1370
1371
1372 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1373 doc: /* Set CHARSET's property list to PLIST. */)
1374 (charset, plist)
1375 Lisp_Object charset, plist;
1376 {
1377 Lisp_Object attrs;
1378
1379 CHECK_CHARSET_GET_ATTR (charset, attrs);
1380 CHARSET_ATTR_PLIST (attrs) = plist;
1381 return plist;
1382 }
1383
1384
1385 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1386 doc: /* Unify characters of CHARSET with Unicode.
1387 This means reading the relevant file and installing the table defined
1388 by CHARSET's `:unify-map' property.
1389
1390 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1391 the same meaning as the `:unify-map' attribute in the function
1392 `define-charset' (which see).
1393
1394 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1395 (charset, unify_map, deunify)
1396 Lisp_Object charset, unify_map, deunify;
1397 {
1398 int id;
1399 struct charset *cs;
1400
1401 CHECK_CHARSET_GET_ID (charset, id);
1402 cs = CHARSET_FROM_ID (id);
1403 if (NILP (deunify)
1404 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1405 : ! CHARSET_UNIFIED_P (cs))
1406 return Qnil;
1407
1408 CHARSET_UNIFIED_P (cs) = 0;
1409 if (NILP (deunify))
1410 {
1411 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1412 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1413 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1414 if (NILP (unify_map))
1415 unify_map = CHARSET_UNIFY_MAP (cs);
1416 else
1417 {
1418 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1419 signal_error ("Bad unify-map", unify_map);
1420 CHARSET_UNIFY_MAP (cs) = unify_map;
1421 }
1422 if (NILP (Vchar_unify_table))
1423 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1424 char_table_set_range (Vchar_unify_table,
1425 cs->min_char, cs->max_char, charset);
1426 CHARSET_UNIFIED_P (cs) = 1;
1427 }
1428 else if (CHAR_TABLE_P (Vchar_unify_table))
1429 {
1430 int min_code = CHARSET_MIN_CODE (cs);
1431 int max_code = CHARSET_MAX_CODE (cs);
1432 int min_char = DECODE_CHAR (cs, min_code);
1433 int max_char = DECODE_CHAR (cs, max_code);
1434
1435 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1436 }
1437
1438 return Qnil;
1439 }
1440
1441 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1442 Sget_unused_iso_final_char, 2, 2, 0,
1443 doc: /*
1444 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1445 DIMENSION is the number of bytes to represent a character: 1 or 2.
1446 CHARS is the number of characters in a dimension: 94 or 96.
1447
1448 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1449 If there's no unused final char for the specified kind of charset,
1450 return nil. */)
1451 (dimension, chars)
1452 Lisp_Object dimension, chars;
1453 {
1454 int final_char;
1455
1456 CHECK_NUMBER (dimension);
1457 CHECK_NUMBER (chars);
1458 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1459 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1460 if (XINT (chars) != 94 && XINT (chars) != 96)
1461 args_out_of_range_3 (chars, make_number (94), make_number (96));
1462 for (final_char = '0'; final_char <= '?'; final_char++)
1463 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1464 break;
1465 return (final_char <= '?' ? make_number (final_char) : Qnil);
1466 }
1467
1468 static void
1469 check_iso_charset_parameter (dimension, chars, final_char)
1470 Lisp_Object dimension, chars, final_char;
1471 {
1472 CHECK_NATNUM (dimension);
1473 CHECK_NATNUM (chars);
1474 CHECK_NATNUM (final_char);
1475
1476 if (XINT (dimension) > 3)
1477 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1478 if (XINT (chars) != 94 && XINT (chars) != 96)
1479 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1480 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1481 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1482 }
1483
1484
1485 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1486 4, 4, 0,
1487 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1488
1489 On decoding by an ISO-2022 base coding system, when a charset
1490 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1491 if CHARSET is designated instead. */)
1492 (dimension, chars, final_char, charset)
1493 Lisp_Object dimension, chars, final_char, charset;
1494 {
1495 int id;
1496 int chars_flag;
1497
1498 CHECK_CHARSET_GET_ID (charset, id);
1499 check_iso_charset_parameter (dimension, chars, final_char);
1500 chars_flag = XINT (chars) == 96;
1501 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1502 return Qnil;
1503 }
1504
1505
1506 /* Return information about charsets in the text at PTR of NBYTES
1507 bytes, which are NCHARS characters. The value is:
1508
1509 0: Each character is represented by one byte. This is always
1510 true for a unibyte string. For a multibyte string, true if
1511 it contains only ASCII characters.
1512
1513 1: No charsets other than ascii, control-1, and latin-1 are
1514 found.
1515
1516 2: Otherwise.
1517 */
1518
1519 int
1520 string_xstring_p (string)
1521 Lisp_Object string;
1522 {
1523 const unsigned char *p = SDATA (string);
1524 const unsigned char *endp = p + SBYTES (string);
1525
1526 if (SCHARS (string) == SBYTES (string))
1527 return 0;
1528
1529 while (p < endp)
1530 {
1531 int c = STRING_CHAR_ADVANCE (p);
1532
1533 if (c >= 0x100)
1534 return 2;
1535 }
1536 return 1;
1537 }
1538
1539
1540 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1541
1542 CHARSETS is a vector. If Nth element is non-nil, it means the
1543 charset whose id is N is already found.
1544
1545 It may lookup a translation table TABLE if supplied. */
1546
1547 static void
1548 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1549 const unsigned char *ptr;
1550 EMACS_INT nchars, nbytes;
1551 Lisp_Object charsets, table;
1552 int multibyte;
1553 {
1554 const unsigned char *pend = ptr + nbytes;
1555
1556 if (nchars == nbytes)
1557 {
1558 if (multibyte)
1559 ASET (charsets, charset_ascii, Qt);
1560 else
1561 while (ptr < pend)
1562 {
1563 int c = *ptr++;
1564
1565 if (!NILP (table))
1566 c = translate_char (table, c);
1567 if (ASCII_BYTE_P (c))
1568 ASET (charsets, charset_ascii, Qt);
1569 else
1570 ASET (charsets, charset_eight_bit, Qt);
1571 }
1572 }
1573 else
1574 {
1575 while (ptr < pend)
1576 {
1577 int c = STRING_CHAR_ADVANCE (ptr);
1578 struct charset *charset;
1579
1580 if (!NILP (table))
1581 c = translate_char (table, c);
1582 charset = CHAR_CHARSET (c);
1583 ASET (charsets, CHARSET_ID (charset), Qt);
1584 }
1585 }
1586 }
1587
1588 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1589 2, 3, 0,
1590 doc: /* Return a list of charsets in the region between BEG and END.
1591 BEG and END are buffer positions.
1592 Optional arg TABLE if non-nil is a translation table to look up.
1593
1594 If the current buffer is unibyte, the returned list may contain
1595 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1596 (beg, end, table)
1597 Lisp_Object beg, end, table;
1598 {
1599 Lisp_Object charsets;
1600 EMACS_INT from, from_byte, to, stop, stop_byte;
1601 int i;
1602 Lisp_Object val;
1603 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1604
1605 validate_region (&beg, &end);
1606 from = XFASTINT (beg);
1607 stop = to = XFASTINT (end);
1608
1609 if (from < GPT && GPT < to)
1610 {
1611 stop = GPT;
1612 stop_byte = GPT_BYTE;
1613 }
1614 else
1615 stop_byte = CHAR_TO_BYTE (stop);
1616
1617 from_byte = CHAR_TO_BYTE (from);
1618
1619 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1620 while (1)
1621 {
1622 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1623 stop_byte - from_byte, charsets, table,
1624 multibyte);
1625 if (stop < to)
1626 {
1627 from = stop, from_byte = stop_byte;
1628 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1629 }
1630 else
1631 break;
1632 }
1633
1634 val = Qnil;
1635 for (i = charset_table_used - 1; i >= 0; i--)
1636 if (!NILP (AREF (charsets, i)))
1637 val = Fcons (CHARSET_NAME (charset_table + i), val);
1638 return val;
1639 }
1640
1641 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1642 1, 2, 0,
1643 doc: /* Return a list of charsets in STR.
1644 Optional arg TABLE if non-nil is a translation table to look up.
1645
1646 If STR is unibyte, the returned list may contain
1647 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1648 (str, table)
1649 Lisp_Object str, table;
1650 {
1651 Lisp_Object charsets;
1652 int i;
1653 Lisp_Object val;
1654
1655 CHECK_STRING (str);
1656
1657 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1658 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1659 charsets, table,
1660 STRING_MULTIBYTE (str));
1661 val = Qnil;
1662 for (i = charset_table_used - 1; i >= 0; i--)
1663 if (!NILP (AREF (charsets, i)))
1664 val = Fcons (CHARSET_NAME (charset_table + i), val);
1665 return val;
1666 }
1667
1668 \f
1669
1670 /* Return a unified character code for C (>= 0x110000). VAL is a
1671 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1672 charset symbol. */
1673 int
1674 maybe_unify_char (c, val)
1675 int c;
1676 Lisp_Object val;
1677 {
1678 struct charset *charset;
1679
1680 if (INTEGERP (val))
1681 return XINT (val);
1682 if (NILP (val))
1683 return c;
1684
1685 CHECK_CHARSET_GET_CHARSET (val, charset);
1686 load_charset (charset, 1);
1687 if (! inhibit_load_charset_map)
1688 {
1689 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1690 if (! NILP (val))
1691 c = XINT (val);
1692 }
1693 else
1694 {
1695 int code_index = c - CHARSET_CODE_OFFSET (charset);
1696 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1697
1698 if (unified > 0)
1699 c = unified;
1700 }
1701 return c;
1702 }
1703
1704
1705 /* Return a character correponding to the code-point CODE of
1706 CHARSET. */
1707
1708 int
1709 decode_char (charset, code)
1710 struct charset *charset;
1711 unsigned code;
1712 {
1713 int c, char_index;
1714 enum charset_method method = CHARSET_METHOD (charset);
1715
1716 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1717 return -1;
1718
1719 if (method == CHARSET_METHOD_SUBSET)
1720 {
1721 Lisp_Object subset_info;
1722
1723 subset_info = CHARSET_SUBSET (charset);
1724 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1725 code -= XINT (AREF (subset_info, 3));
1726 if (code < XFASTINT (AREF (subset_info, 1))
1727 || code > XFASTINT (AREF (subset_info, 2)))
1728 c = -1;
1729 else
1730 c = DECODE_CHAR (charset, code);
1731 }
1732 else if (method == CHARSET_METHOD_SUPERSET)
1733 {
1734 Lisp_Object parents;
1735
1736 parents = CHARSET_SUPERSET (charset);
1737 c = -1;
1738 for (; CONSP (parents); parents = XCDR (parents))
1739 {
1740 int id = XINT (XCAR (XCAR (parents)));
1741 int code_offset = XINT (XCDR (XCAR (parents)));
1742 unsigned this_code = code - code_offset;
1743
1744 charset = CHARSET_FROM_ID (id);
1745 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1746 break;
1747 }
1748 }
1749 else
1750 {
1751 char_index = CODE_POINT_TO_INDEX (charset, code);
1752 if (char_index < 0)
1753 return -1;
1754
1755 if (method == CHARSET_METHOD_MAP)
1756 {
1757 Lisp_Object decoder;
1758
1759 decoder = CHARSET_DECODER (charset);
1760 if (! VECTORP (decoder))
1761 {
1762 load_charset (charset, 1);
1763 decoder = CHARSET_DECODER (charset);
1764 }
1765 if (VECTORP (decoder))
1766 c = XINT (AREF (decoder, char_index));
1767 else
1768 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1769 }
1770 else /* method == CHARSET_METHOD_OFFSET */
1771 {
1772 c = char_index + CHARSET_CODE_OFFSET (charset);
1773 if (CHARSET_UNIFIED_P (charset)
1774 && c > MAX_UNICODE_CHAR)
1775 MAYBE_UNIFY_CHAR (c);
1776 }
1777 }
1778
1779 return c;
1780 }
1781
1782 /* Variable used temporarily by the macro ENCODE_CHAR. */
1783 Lisp_Object charset_work;
1784
1785 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1786 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1787 use CHARSET's strict_max_char instead of max_char. */
1788
1789 unsigned
1790 encode_char (charset, c)
1791 struct charset *charset;
1792 int c;
1793 {
1794 unsigned code;
1795 enum charset_method method = CHARSET_METHOD (charset);
1796
1797 if (CHARSET_UNIFIED_P (charset))
1798 {
1799 Lisp_Object deunifier, deunified;
1800 int code_index = -1;
1801
1802 deunifier = CHARSET_DEUNIFIER (charset);
1803 if (! CHAR_TABLE_P (deunifier))
1804 {
1805 load_charset (charset, 2);
1806 deunifier = CHARSET_DEUNIFIER (charset);
1807 }
1808 if (CHAR_TABLE_P (deunifier))
1809 {
1810 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1811
1812 if (INTEGERP (deunified))
1813 code_index = XINT (deunified);
1814 }
1815 else
1816 {
1817 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1818 }
1819 if (code_index >= 0)
1820 c = CHARSET_CODE_OFFSET (charset) + code_index;
1821 }
1822
1823 if (method == CHARSET_METHOD_SUBSET)
1824 {
1825 Lisp_Object subset_info;
1826 struct charset *this_charset;
1827
1828 subset_info = CHARSET_SUBSET (charset);
1829 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1830 code = ENCODE_CHAR (this_charset, c);
1831 if (code == CHARSET_INVALID_CODE (this_charset)
1832 || code < XFASTINT (AREF (subset_info, 1))
1833 || code > XFASTINT (AREF (subset_info, 2)))
1834 return CHARSET_INVALID_CODE (charset);
1835 code += XINT (AREF (subset_info, 3));
1836 return code;
1837 }
1838
1839 if (method == CHARSET_METHOD_SUPERSET)
1840 {
1841 Lisp_Object parents;
1842
1843 parents = CHARSET_SUPERSET (charset);
1844 for (; CONSP (parents); parents = XCDR (parents))
1845 {
1846 int id = XINT (XCAR (XCAR (parents)));
1847 int code_offset = XINT (XCDR (XCAR (parents)));
1848 struct charset *this_charset = CHARSET_FROM_ID (id);
1849
1850 code = ENCODE_CHAR (this_charset, c);
1851 if (code != CHARSET_INVALID_CODE (this_charset))
1852 return code + code_offset;
1853 }
1854 return CHARSET_INVALID_CODE (charset);
1855 }
1856
1857 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1858 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1859 return CHARSET_INVALID_CODE (charset);
1860
1861 if (method == CHARSET_METHOD_MAP)
1862 {
1863 Lisp_Object encoder;
1864 Lisp_Object val;
1865
1866 encoder = CHARSET_ENCODER (charset);
1867 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1868 {
1869 load_charset (charset, 2);
1870 encoder = CHARSET_ENCODER (charset);
1871 }
1872 if (CHAR_TABLE_P (encoder))
1873 {
1874 val = CHAR_TABLE_REF (encoder, c);
1875 if (NILP (val))
1876 return CHARSET_INVALID_CODE (charset);
1877 code = XINT (val);
1878 if (! CHARSET_COMPACT_CODES_P (charset))
1879 code = INDEX_TO_CODE_POINT (charset, code);
1880 }
1881 else
1882 {
1883 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1884 code = INDEX_TO_CODE_POINT (charset, code);
1885 }
1886 }
1887 else /* method == CHARSET_METHOD_OFFSET */
1888 {
1889 int code_index = c - CHARSET_CODE_OFFSET (charset);
1890
1891 code = INDEX_TO_CODE_POINT (charset, code_index);
1892 }
1893
1894 return code;
1895 }
1896
1897
1898 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1899 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1900 Return nil if CODE-POINT is not valid in CHARSET.
1901
1902 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1903
1904 Optional argument RESTRICTION specifies a way to map the pair of CCS
1905 and CODE-POINT to a character. Currently not supported and just ignored. */)
1906 (charset, code_point, restriction)
1907 Lisp_Object charset, code_point, restriction;
1908 {
1909 int c, id;
1910 unsigned code;
1911 struct charset *charsetp;
1912
1913 CHECK_CHARSET_GET_ID (charset, id);
1914 if (CONSP (code_point))
1915 {
1916 CHECK_NATNUM_CAR (code_point);
1917 CHECK_NATNUM_CDR (code_point);
1918 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1919 }
1920 else
1921 {
1922 CHECK_NATNUM (code_point);
1923 code = XINT (code_point);
1924 }
1925 charsetp = CHARSET_FROM_ID (id);
1926 c = DECODE_CHAR (charsetp, code);
1927 return (c >= 0 ? make_number (c) : Qnil);
1928 }
1929
1930
1931 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1932 doc: /* Encode the character CH into a code-point of CHARSET.
1933 Return nil if CHARSET doesn't include CH.
1934
1935 Optional argument RESTRICTION specifies a way to map CH to a
1936 code-point in CCS. Currently not supported and just ignored. */)
1937 (ch, charset, restriction)
1938 Lisp_Object ch, charset, restriction;
1939 {
1940 int id;
1941 unsigned code;
1942 struct charset *charsetp;
1943
1944 CHECK_CHARSET_GET_ID (charset, id);
1945 CHECK_NATNUM (ch);
1946 charsetp = CHARSET_FROM_ID (id);
1947 code = ENCODE_CHAR (charsetp, XINT (ch));
1948 if (code == CHARSET_INVALID_CODE (charsetp))
1949 return Qnil;
1950 if (code > 0x7FFFFFF)
1951 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1952 return make_number (code);
1953 }
1954
1955
1956 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1957 doc:
1958 /* Return a character of CHARSET whose position codes are CODEn.
1959
1960 CODE1 through CODE4 are optional, but if you don't supply sufficient
1961 position codes, it is assumed that the minimum code in each dimension
1962 is specified. */)
1963 (charset, code1, code2, code3, code4)
1964 Lisp_Object charset, code1, code2, code3, code4;
1965 {
1966 int id, dimension;
1967 struct charset *charsetp;
1968 unsigned code;
1969 int c;
1970
1971 CHECK_CHARSET_GET_ID (charset, id);
1972 charsetp = CHARSET_FROM_ID (id);
1973
1974 dimension = CHARSET_DIMENSION (charsetp);
1975 if (NILP (code1))
1976 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1977 ? 0 : CHARSET_MIN_CODE (charsetp));
1978 else
1979 {
1980 CHECK_NATNUM (code1);
1981 if (XFASTINT (code1) >= 0x100)
1982 args_out_of_range (make_number (0xFF), code1);
1983 code = XFASTINT (code1);
1984
1985 if (dimension > 1)
1986 {
1987 code <<= 8;
1988 if (NILP (code2))
1989 code |= charsetp->code_space[(dimension - 2) * 4];
1990 else
1991 {
1992 CHECK_NATNUM (code2);
1993 if (XFASTINT (code2) >= 0x100)
1994 args_out_of_range (make_number (0xFF), code2);
1995 code |= XFASTINT (code2);
1996 }
1997
1998 if (dimension > 2)
1999 {
2000 code <<= 8;
2001 if (NILP (code3))
2002 code |= charsetp->code_space[(dimension - 3) * 4];
2003 else
2004 {
2005 CHECK_NATNUM (code3);
2006 if (XFASTINT (code3) >= 0x100)
2007 args_out_of_range (make_number (0xFF), code3);
2008 code |= XFASTINT (code3);
2009 }
2010
2011 if (dimension > 3)
2012 {
2013 code <<= 8;
2014 if (NILP (code4))
2015 code |= charsetp->code_space[0];
2016 else
2017 {
2018 CHECK_NATNUM (code4);
2019 if (XFASTINT (code4) >= 0x100)
2020 args_out_of_range (make_number (0xFF), code4);
2021 code |= XFASTINT (code4);
2022 }
2023 }
2024 }
2025 }
2026 }
2027
2028 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2029 code &= 0x7F7F7F7F;
2030 c = DECODE_CHAR (charsetp, code);
2031 if (c < 0)
2032 error ("Invalid code(s)");
2033 return make_number (c);
2034 }
2035
2036
2037 /* Return the first charset in CHARSET_LIST that contains C.
2038 CHARSET_LIST is a list of charset IDs. If it is nil, use
2039 Vcharset_ordered_list. */
2040
2041 struct charset *
2042 char_charset (c, charset_list, code_return)
2043 int c;
2044 Lisp_Object charset_list;
2045 unsigned *code_return;
2046 {
2047 int maybe_null = 0;
2048
2049 if (NILP (charset_list))
2050 charset_list = Vcharset_ordered_list;
2051 else
2052 maybe_null = 1;
2053
2054 while (CONSP (charset_list))
2055 {
2056 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2057 unsigned code = ENCODE_CHAR (charset, c);
2058
2059 if (code != CHARSET_INVALID_CODE (charset))
2060 {
2061 if (code_return)
2062 *code_return = code;
2063 return charset;
2064 }
2065 charset_list = XCDR (charset_list);
2066 if (c <= MAX_UNICODE_CHAR
2067 && EQ (charset_list, Vcharset_non_preferred_head))
2068 return CHARSET_FROM_ID (charset_unicode);
2069 }
2070 return (maybe_null ? NULL
2071 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2072 : CHARSET_FROM_ID (charset_eight_bit));
2073 }
2074
2075
2076 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2077 doc:
2078 /*Return list of charset and one to four position-codes of CH.
2079 The charset is decided by the current priority order of charsets.
2080 A position-code is a byte value of each dimension of the code-point of
2081 CH in the charset. */)
2082 (ch)
2083 Lisp_Object ch;
2084 {
2085 struct charset *charset;
2086 int c, dimension;
2087 unsigned code;
2088 Lisp_Object val;
2089
2090 CHECK_CHARACTER (ch);
2091 c = XFASTINT (ch);
2092 charset = CHAR_CHARSET (c);
2093 if (! charset)
2094 abort ();
2095 code = ENCODE_CHAR (charset, c);
2096 if (code == CHARSET_INVALID_CODE (charset))
2097 abort ();
2098 dimension = CHARSET_DIMENSION (charset);
2099 for (val = Qnil; dimension > 0; dimension--)
2100 {
2101 val = Fcons (make_number (code & 0xFF), val);
2102 code >>= 8;
2103 }
2104 return Fcons (CHARSET_NAME (charset), val);
2105 }
2106
2107
2108 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2109 doc: /* Return the charset of highest priority that contains CH.
2110 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2111 from which to find the charset. It may also be a coding system. In
2112 that case, find the charset from what supported by that coding system. */)
2113 (ch, restriction)
2114 Lisp_Object ch, restriction;
2115 {
2116 struct charset *charset;
2117
2118 CHECK_CHARACTER (ch);
2119 if (NILP (restriction))
2120 charset = CHAR_CHARSET (XINT (ch));
2121 else
2122 {
2123 Lisp_Object charset_list;
2124
2125 if (CONSP (restriction))
2126 {
2127 for (charset_list = Qnil; CONSP (restriction);
2128 restriction = XCDR (restriction))
2129 {
2130 int id;
2131
2132 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2133 charset_list = Fcons (make_number (id), charset_list);
2134 }
2135 charset_list = Fnreverse (charset_list);
2136 }
2137 else
2138 charset_list = coding_system_charset_list (restriction);
2139 charset = char_charset (XINT (ch), charset_list, NULL);
2140 if (! charset)
2141 return Qnil;
2142 }
2143 return (CHARSET_NAME (charset));
2144 }
2145
2146
2147 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2148 doc: /*
2149 Return charset of a character in the current buffer at position POS.
2150 If POS is nil, it defauls to the current point.
2151 If POS is out of range, the value is nil. */)
2152 (pos)
2153 Lisp_Object pos;
2154 {
2155 Lisp_Object ch;
2156 struct charset *charset;
2157
2158 ch = Fchar_after (pos);
2159 if (! INTEGERP (ch))
2160 return ch;
2161 charset = CHAR_CHARSET (XINT (ch));
2162 return (CHARSET_NAME (charset));
2163 }
2164
2165
2166 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2167 doc: /*
2168 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2169
2170 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2171 by their DIMENSION, CHARS, and FINAL-CHAR,
2172 whereas Emacs distinguishes them by charset symbol.
2173 See the documentation of the function `charset-info' for the meanings of
2174 DIMENSION, CHARS, and FINAL-CHAR. */)
2175 (dimension, chars, final_char)
2176 Lisp_Object dimension, chars, final_char;
2177 {
2178 int id;
2179 int chars_flag;
2180
2181 check_iso_charset_parameter (dimension, chars, final_char);
2182 chars_flag = XFASTINT (chars) == 96;
2183 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2184 XFASTINT (final_char));
2185 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2186 }
2187
2188
2189 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2190 0, 0, 0,
2191 doc: /*
2192 Internal use only.
2193 Clear temporary charset mapping tables.
2194 It should be called only from temacs invoked for dumping. */)
2195 ()
2196 {
2197 int i;
2198 struct charset *charset;
2199 Lisp_Object attrs;
2200
2201 if (temp_charset_work)
2202 {
2203 free (temp_charset_work);
2204 temp_charset_work = NULL;
2205 }
2206
2207 if (CHAR_TABLE_P (Vchar_unify_table))
2208 Foptimize_char_table (Vchar_unify_table, Qnil);
2209
2210 return Qnil;
2211 }
2212
2213 DEFUN ("charset-priority-list", Fcharset_priority_list,
2214 Scharset_priority_list, 0, 1, 0,
2215 doc: /* Return the list of charsets ordered by priority.
2216 HIGHESTP non-nil means just return the highest priority one. */)
2217 (highestp)
2218 Lisp_Object highestp;
2219 {
2220 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2221
2222 if (!NILP (highestp))
2223 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2224
2225 while (!NILP (list))
2226 {
2227 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2228 list = XCDR (list);
2229 }
2230 return Fnreverse (val);
2231 }
2232
2233 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2234 1, MANY, 0,
2235 doc: /* Assign higher priority to the charsets given as arguments.
2236 usage: (set-charset-priority &rest charsets) */)
2237 (nargs, args)
2238 int nargs;
2239 Lisp_Object *args;
2240 {
2241 Lisp_Object new_head, old_list, arglist[2];
2242 Lisp_Object list_2022, list_emacs_mule;
2243 int i, id;
2244
2245 old_list = Fcopy_sequence (Vcharset_ordered_list);
2246 new_head = Qnil;
2247 for (i = 0; i < nargs; i++)
2248 {
2249 CHECK_CHARSET_GET_ID (args[i], id);
2250 if (! NILP (Fmemq (make_number (id), old_list)))
2251 {
2252 old_list = Fdelq (make_number (id), old_list);
2253 new_head = Fcons (make_number (id), new_head);
2254 }
2255 }
2256 arglist[0] = Fnreverse (new_head);
2257 arglist[1] = Vcharset_non_preferred_head = old_list;
2258 Vcharset_ordered_list = Fnconc (2, arglist);
2259 charset_ordered_list_tick++;
2260
2261 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2262 CONSP (old_list); old_list = XCDR (old_list))
2263 {
2264 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2265 list_2022 = Fcons (XCAR (old_list), list_2022);
2266 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2267 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2268 }
2269 Viso_2022_charset_list = Fnreverse (list_2022);
2270 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2271
2272 return Qnil;
2273 }
2274
2275 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2276 0, 1, 0,
2277 doc: /* Internal use only.
2278 Return charset identification number of CHARSET. */)
2279 (charset)
2280 Lisp_Object charset;
2281 {
2282 int id;
2283
2284 CHECK_CHARSET_GET_ID (charset, id);
2285 return make_number (id);
2286 }
2287
2288 \f
2289 void
2290 init_charset ()
2291 {
2292 Lisp_Object tempdir;
2293 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2294 if (access (SDATA (tempdir), 0) < 0)
2295 {
2296 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2297 Emacs will not function correctly without the character map files.\n\
2298 Please check your installation!\n",
2299 tempdir);
2300 /* TODO should this be a fatal error? (Bug#909) */
2301 }
2302
2303 Vcharset_map_path = Fcons (tempdir, Qnil);
2304 }
2305
2306
2307 void
2308 init_charset_once ()
2309 {
2310 int i, j, k;
2311
2312 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2313 for (j = 0; j < ISO_MAX_CHARS; j++)
2314 for (k = 0; k < ISO_MAX_FINAL; k++)
2315 iso_charset_table[i][j][k] = -1;
2316
2317 for (i = 0; i < 256; i++)
2318 emacs_mule_charset[i] = NULL;
2319
2320 charset_jisx0201_roman = -1;
2321 charset_jisx0208_1978 = -1;
2322 charset_jisx0208 = -1;
2323
2324 for (i = 0; i < 128; i++)
2325 unibyte_to_multibyte_table[i] = i;
2326 for (; i < 256; i++)
2327 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2328 }
2329
2330 #ifdef emacs
2331
2332 void
2333 syms_of_charset ()
2334 {
2335 DEFSYM (Qcharsetp, "charsetp");
2336
2337 DEFSYM (Qascii, "ascii");
2338 DEFSYM (Qunicode, "unicode");
2339 DEFSYM (Qemacs, "emacs");
2340 DEFSYM (Qeight_bit, "eight-bit");
2341 DEFSYM (Qiso_8859_1, "iso-8859-1");
2342
2343 DEFSYM (Qgl, "gl");
2344 DEFSYM (Qgr, "gr");
2345
2346 staticpro (&Vcharset_ordered_list);
2347 Vcharset_ordered_list = Qnil;
2348
2349 staticpro (&Viso_2022_charset_list);
2350 Viso_2022_charset_list = Qnil;
2351
2352 staticpro (&Vemacs_mule_charset_list);
2353 Vemacs_mule_charset_list = Qnil;
2354
2355 /* Don't staticpro them here. It's done in syms_of_fns. */
2356 QCtest = intern (":test");
2357 Qeq = intern ("eq");
2358
2359 staticpro (&Vcharset_hash_table);
2360 {
2361 Lisp_Object args[2];
2362 args[0] = QCtest;
2363 args[1] = Qeq;
2364 Vcharset_hash_table = Fmake_hash_table (2, args);
2365 }
2366
2367 charset_table_size = 128;
2368 charset_table = ((struct charset *)
2369 xmalloc (sizeof (struct charset) * charset_table_size));
2370 charset_table_used = 0;
2371
2372 defsubr (&Scharsetp);
2373 defsubr (&Smap_charset_chars);
2374 defsubr (&Sdefine_charset_internal);
2375 defsubr (&Sdefine_charset_alias);
2376 defsubr (&Scharset_plist);
2377 defsubr (&Sset_charset_plist);
2378 defsubr (&Sunify_charset);
2379 defsubr (&Sget_unused_iso_final_char);
2380 defsubr (&Sdeclare_equiv_charset);
2381 defsubr (&Sfind_charset_region);
2382 defsubr (&Sfind_charset_string);
2383 defsubr (&Sdecode_char);
2384 defsubr (&Sencode_char);
2385 defsubr (&Ssplit_char);
2386 defsubr (&Smake_char);
2387 defsubr (&Schar_charset);
2388 defsubr (&Scharset_after);
2389 defsubr (&Siso_charset);
2390 defsubr (&Sclear_charset_maps);
2391 defsubr (&Scharset_priority_list);
2392 defsubr (&Sset_charset_priority);
2393 defsubr (&Scharset_id_internal);
2394
2395 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2396 doc: /* *List of directories to search for charset map files. */);
2397 Vcharset_map_path = Qnil;
2398
2399 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2400 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2401 inhibit_load_charset_map = 0;
2402
2403 DEFVAR_LISP ("charset-list", &Vcharset_list,
2404 doc: /* List of all charsets ever defined. */);
2405 Vcharset_list = Qnil;
2406
2407 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2408 doc: /* ISO639 language mnemonic symbol for the current language environment.
2409 If the current language environment is for multiple languages (e.g. "Latin-1"),
2410 the value may be a list of mnemonics. */);
2411 Vcurrent_iso639_language = Qnil;
2412
2413 charset_ascii
2414 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2415 0, 127, 'B', -1, 0, 1, 0, 0);
2416 charset_iso_8859_1
2417 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2418 0, 255, -1, -1, -1, 1, 0, 0);
2419 charset_unicode
2420 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2421 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2422 charset_emacs
2423 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2424 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2425 charset_eight_bit
2426 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2427 128, 255, -1, 0, -1, 0, 1,
2428 MAX_5_BYTE_CHAR + 1);
2429 }
2430
2431 #endif /* emacs */
2432
2433 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2434 (do not change this comment) */