Follow Glenn's lead and update format of Copyright.
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77 Lisp_Object Qemacs;
78
79 /* The corresponding charsets. */
80 int charset_ascii;
81 int charset_eight_bit;
82 int charset_iso_8859_1;
83 int charset_unicode;
84 int charset_emacs;
85
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
90
91 /* Value of charset attribute `charset-iso-plane'. */
92 Lisp_Object Qgl, Qgr;
93
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
96
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
99
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
103
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
108
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
111
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
114
115 struct charset *emacs_mule_charset[256];
116
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
121 Lisp_Object Vcharset_map_path;
122
123 /* If nonzero, don't load charset maps. */
124 int inhibit_load_charset_map;
125
126 Lisp_Object Vcurrent_iso639_language;
127
128 /* Defined in chartab.c */
129 extern void
130 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
131 Lisp_Object function, Lisp_Object table,
132 Lisp_Object arg, struct charset *charset,
133 unsigned from, unsigned to));
134
135 #define CODE_POINT_TO_INDEX(charset, code) \
136 ((charset)->code_linear_p \
137 ? (code) - (charset)->min_code \
138 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
139 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
140 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
141 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
142 ? (((((code) >> 24) - (charset)->code_space[12]) \
143 * (charset)->code_space[11]) \
144 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
145 * (charset)->code_space[7]) \
146 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
147 * (charset)->code_space[3]) \
148 + (((code) & 0xFF) - (charset)->code_space[0]) \
149 - ((charset)->char_index_offset)) \
150 : -1)
151
152
153 /* Convert the character index IDX to code-point CODE for CHARSET.
154 It is assumed that IDX is in a valid range. */
155
156 #define INDEX_TO_CODE_POINT(charset, idx) \
157 ((charset)->code_linear_p \
158 ? (idx) + (charset)->min_code \
159 : (idx += (charset)->char_index_offset, \
160 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
161 | (((charset)->code_space[4] \
162 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
163 << 8) \
164 | (((charset)->code_space[8] \
165 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
166 << 16) \
167 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
168 << 24))))
169
170 /* Structure to hold mapping tables for a charset. Used by temacs
171 invoked for dumping. */
172
173 static struct
174 {
175 /* The current charset for which the following tables are setup. */
176 struct charset *current;
177
178 /* 1 iff the following table is used for encoder. */
179 short for_encoder;
180
181 /* When the following table is used for encoding, mininum and
182 maxinum character of the current charset. */
183 int min_char, max_char;
184
185 /* A Unicode character correspoinding to the code indice 0 (i.e. the
186 minimum code-point) of the current charset, or -1 if the code
187 indice 0 is not a Unicode character. This is checked when
188 table.encoder[CHAR] is zero. */
189 int zero_index_char;
190
191 union {
192 /* Table mapping code-indices (not code-points) of the current
193 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
194 doesn't belong to the current charset. */
195 int decoder[0x10000];
196 /* Table mapping Unicode characters to code-indices of the current
197 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
198 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
199 (0x20000..0x2FFFF). Note that there is no charset map that
200 uses both SMP and SIP. */
201 unsigned short encoder[0x20000];
202 } table;
203 } *temp_charset_work;
204
205 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
206 do { \
207 if ((CODE) == 0) \
208 temp_charset_work->zero_index_char = (C); \
209 else if ((C) < 0x20000) \
210 temp_charset_work->table.encoder[(C)] = (CODE); \
211 else \
212 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
213 } while (0)
214
215 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
216 ((C) == temp_charset_work->zero_index_char ? 0 \
217 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
218 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
219 : temp_charset_work->table.encoder[(C) - 0x10000] \
220 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
221
222 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
223 (temp_charset_work->table.decoder[(CODE)] = (C))
224
225 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
226 (temp_charset_work->table.decoder[(CODE)])
227 \f
228
229 /* Set to 1 to warn that a charset map is loaded and thus a buffer
230 text and a string data may be relocated. */
231 int charset_map_loaded;
232
233 struct charset_map_entries
234 {
235 struct {
236 unsigned from, to;
237 int c;
238 } entry[0x10000];
239 struct charset_map_entries *next;
240 };
241
242 /* Load the mapping information of CHARSET from ENTRIES for
243 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
244 encoding (CONTROL_FLAG == 2).
245
246 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
247 and CHARSET->fast_map.
248
249 If CONTROL_FLAG is 1, setup the following tables according to
250 CHARSET->method and inhibit_load_charset_map.
251
252 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
253 ----------------------+--------------------+---------------------------
254 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
257
258 If CONTROL_FLAG is 2, setup the following tables.
259
260 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
261 ----------------------+--------------------+---------------------------
262 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
263 ----------------------+--------------------+--------------------------
264 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
265 */
266
267 static void
268 load_charset_map (charset, entries, n_entries, control_flag)
269 struct charset *charset;
270 struct charset_map_entries *entries;
271 int n_entries;
272 int control_flag;
273 {
274 Lisp_Object vec, table;
275 unsigned max_code = CHARSET_MAX_CODE (charset);
276 int ascii_compatible_p = charset->ascii_compatible_p;
277 int min_char, max_char, nonascii_min_char;
278 int i;
279 unsigned char *fast_map = charset->fast_map;
280
281 if (n_entries <= 0)
282 return;
283
284 if (control_flag)
285 {
286 if (! inhibit_load_charset_map)
287 {
288 if (control_flag == 1)
289 {
290 if (charset->method == CHARSET_METHOD_MAP)
291 {
292 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
293
294 vec = CHARSET_DECODER (charset)
295 = Fmake_vector (make_number (n), make_number (-1));
296 }
297 else
298 {
299 char_table_set_range (Vchar_unify_table,
300 charset->min_char, charset->max_char,
301 Qnil);
302 }
303 }
304 else
305 {
306 table = Fmake_char_table (Qnil, Qnil);
307 if (charset->method == CHARSET_METHOD_MAP)
308 CHARSET_ENCODER (charset) = table;
309 else
310 CHARSET_DEUNIFIER (charset) = table;
311 }
312 }
313 else
314 {
315 if (! temp_charset_work)
316 temp_charset_work = malloc (sizeof (*temp_charset_work));
317 if (control_flag == 1)
318 {
319 memset (temp_charset_work->table.decoder, -1,
320 sizeof (int) * 0x10000);
321 temp_charset_work->for_encoder = 0;
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
333 charset_map_loaded = 1;
334 }
335
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
339 {
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
344
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
352 {
353 to_index = from_index;
354 to_c = from_c;
355 }
356 else
357 {
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
360 }
361 if (from_index < 0 || to_index < 0)
362 continue;
363
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
368
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
418
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
421 }
422 }
423
424 if (control_flag == 0)
425 {
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
429 }
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
435 }
436
437
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
440
441 static INLINE unsigned
442 read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
445 {
446 int c;
447 unsigned n;
448
449 while ((c = getc (fp)) != EOF)
450 {
451 if (c == '#')
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
460 }
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
478 }
479
480
481 /* Return a mapping vector for CHARSET loaded from MAPFILE.
482 Each line of MAPFILE has this form
483 0xAAAA 0xCCCC
484 where 0xAAAA is a code-point and 0xCCCC is the corresponding
485 character code, or this form
486 0xAAAA-0xBBBB 0xCCCC
487 where 0xAAAA and 0xBBBB are code-points specifying a range, and
488 0xCCCC is the first character code of the range.
489
490 The returned vector has this form:
491 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
492 where CODE1 is a code-point or a cons of code-points specifying a
493 range. */
494
495 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
496
497 static void
498 load_charset_map_from_file (charset, mapfile, control_flag)
499 struct charset *charset;
500 Lisp_Object mapfile;
501 int control_flag;
502 {
503 unsigned min_code = CHARSET_MIN_CODE (charset);
504 unsigned max_code = CHARSET_MAX_CODE (charset);
505 int fd;
506 FILE *fp;
507 int eof;
508 Lisp_Object suffixes;
509 struct charset_map_entries *head, *entries;
510 int n_entries;
511
512 suffixes = Fcons (build_string (".map"),
513 Fcons (build_string (".TXT"), Qnil));
514
515 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 {
519 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
520 return;
521 }
522
523 head = entries = ((struct charset_map_entries *)
524 alloca (sizeof (struct charset_map_entries)));
525 n_entries = 0;
526 eof = 0;
527 while (1)
528 {
529 unsigned from, to;
530 int c;
531 int idx;
532
533 from = read_hex (fp, &eof);
534 if (eof)
535 break;
536 if (getc (fp) == '-')
537 to = read_hex (fp, &eof);
538 else
539 to = from;
540 c = (int) read_hex (fp, &eof);
541
542 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
543 continue;
544
545 if (n_entries > 0 && (n_entries % 0x10000) == 0)
546 {
547 entries->next = ((struct charset_map_entries *)
548 alloca (sizeof (struct charset_map_entries)));
549 entries = entries->next;
550 }
551 idx = n_entries % 0x10000;
552 entries->entry[idx].from = from;
553 entries->entry[idx].to = to;
554 entries->entry[idx].c = c;
555 n_entries++;
556 }
557 fclose (fp);
558 close (fd);
559
560 load_charset_map (charset, head, n_entries, control_flag);
561 }
562
563 static void
564 load_charset_map_from_vector (charset, vec, control_flag)
565 struct charset *charset;
566 Lisp_Object vec;
567 int control_flag;
568 {
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
575
576 if (len % 2 == 1)
577 {
578 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
579 return;
580 }
581
582 head = entries = ((struct charset_map_entries *)
583 alloca (sizeof (struct charset_map_entries)));
584 n_entries = 0;
585 for (i = 0; i < len; i += 2)
586 {
587 Lisp_Object val, val2;
588 unsigned from, to;
589 int c;
590 int idx;
591
592 val = AREF (vec, i);
593 if (CONSP (val))
594 {
595 val2 = XCDR (val);
596 val = XCAR (val);
597 CHECK_NATNUM (val);
598 CHECK_NATNUM (val2);
599 from = XFASTINT (val);
600 to = XFASTINT (val2);
601 }
602 else
603 {
604 CHECK_NATNUM (val);
605 from = to = XFASTINT (val);
606 }
607 val = AREF (vec, i + 1);
608 CHECK_NATNUM (val);
609 c = XFASTINT (val);
610
611 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
612 continue;
613
614 if (n_entries > 0 && (n_entries % 0x10000) == 0)
615 {
616 entries->next = ((struct charset_map_entries *)
617 alloca (sizeof (struct charset_map_entries)));
618 entries = entries->next;
619 }
620 idx = n_entries % 0x10000;
621 entries->entry[idx].from = from;
622 entries->entry[idx].to = to;
623 entries->entry[idx].c = c;
624 n_entries++;
625 }
626
627 load_charset_map (charset, head, n_entries, control_flag);
628 }
629
630
631 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
632 map it is (see the comment of load_charset_map for the detail). */
633
634 static void
635 load_charset (charset, control_flag)
636 struct charset *charset;
637 int control_flag;
638 {
639 Lisp_Object map;
640
641 if (inhibit_load_charset_map
642 && temp_charset_work
643 && charset == temp_charset_work->current
644 && (control_flag == 2 == temp_charset_work->for_encoder))
645 return;
646
647 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
648 map = CHARSET_MAP (charset);
649 else if (CHARSET_UNIFIED_P (charset))
650 map = CHARSET_UNIFY_MAP (charset);
651 if (STRINGP (map))
652 load_charset_map_from_file (charset, map, control_flag);
653 else
654 load_charset_map_from_vector (charset, map, control_flag);
655 }
656
657
658 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
659 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
660 (object)
661 Lisp_Object object;
662 {
663 return (CHARSETP (object) ? Qt : Qnil);
664 }
665
666
667 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
668 Lisp_Object function, Lisp_Object arg,
669 unsigned from, unsigned to));
670
671 void
672 map_charset_for_dump (c_function, function, arg, from, to)
673 void (*c_function) (Lisp_Object, Lisp_Object);
674 Lisp_Object function, arg;
675 unsigned from, to;
676 {
677 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
678 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
679 Lisp_Object range;
680 int c, stop;
681 struct gcpro gcpro1;
682
683 range = Fcons (Qnil, Qnil);
684 GCPRO1 (range);
685
686 c = temp_charset_work->min_char;
687 stop = (temp_charset_work->max_char < 0x20000
688 ? temp_charset_work->max_char : 0xFFFF);
689
690 while (1)
691 {
692 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
693
694 if (index >= from_idx && index <= to_idx)
695 {
696 if (NILP (XCAR (range)))
697 XSETCAR (range, make_number (c));
698 }
699 else if (! NILP (XCAR (range)))
700 {
701 XSETCDR (range, make_number (c - 1));
702 if (c_function)
703 (*c_function) (arg, range);
704 else
705 call2 (function, range, arg);
706 XSETCAR (range, Qnil);
707 }
708 if (c == stop)
709 {
710 if (c == temp_charset_work->max_char)
711 {
712 if (! NILP (XCAR (range)))
713 {
714 XSETCDR (range, make_number (c));
715 if (c_function)
716 (*c_function) (arg, range);
717 else
718 call2 (function, range, arg);
719 }
720 break;
721 }
722 c = 0x1FFFF;
723 stop = temp_charset_work->max_char;
724 }
725 c++;
726 }
727 }
728
729 void
730 map_charset_chars (c_function, function, arg,
731 charset, from, to)
732 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
733 Lisp_Object function, arg;
734 struct charset *charset;
735 unsigned from, to;
736 {
737 Lisp_Object range;
738 int partial;
739
740 partial = (from > CHARSET_MIN_CODE (charset)
741 || to < CHARSET_MAX_CODE (charset));
742
743 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
744 {
745 int from_idx = CODE_POINT_TO_INDEX (charset, from);
746 int to_idx = CODE_POINT_TO_INDEX (charset, to);
747 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
748 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
749
750 if (CHARSET_UNIFIED_P (charset))
751 {
752 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
753 load_charset (charset, 2);
754 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
755 map_char_table_for_charset (c_function, function,
756 CHARSET_DEUNIFIER (charset), arg,
757 partial ? charset : NULL, from, to);
758 else
759 map_charset_for_dump (c_function, function, arg, from, to);
760 }
761
762 range = Fcons (make_number (from_c), make_number (to_c));
763 if (NILP (function))
764 (*c_function) (arg, range);
765 else
766 call2 (function, range, arg);
767 }
768 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
769 {
770 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
771 load_charset (charset, 2);
772 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
773 map_char_table_for_charset (c_function, function,
774 CHARSET_ENCODER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
778 }
779 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
780 {
781 Lisp_Object subset_info;
782 int offset;
783
784 subset_info = CHARSET_SUBSET (charset);
785 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
786 offset = XINT (AREF (subset_info, 3));
787 from -= offset;
788 if (from < XFASTINT (AREF (subset_info, 1)))
789 from = XFASTINT (AREF (subset_info, 1));
790 to -= offset;
791 if (to > XFASTINT (AREF (subset_info, 2)))
792 to = XFASTINT (AREF (subset_info, 2));
793 map_charset_chars (c_function, function, arg, charset, from, to);
794 }
795 else /* i.e. CHARSET_METHOD_SUPERSET */
796 {
797 Lisp_Object parents;
798
799 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
800 parents = XCDR (parents))
801 {
802 int offset;
803 unsigned this_from, this_to;
804
805 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
806 offset = XINT (XCDR (XCAR (parents)));
807 this_from = from - offset;
808 this_to = to - offset;
809 if (this_from < CHARSET_MIN_CODE (charset))
810 this_from = CHARSET_MIN_CODE (charset);
811 if (this_to > CHARSET_MAX_CODE (charset))
812 this_to = CHARSET_MAX_CODE (charset);
813 map_charset_chars (c_function, function, arg, charset,
814 this_from, this_to);
815 }
816 }
817 }
818
819 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
820 doc: /* Call FUNCTION for all characters in CHARSET.
821 FUNCTION is called with an argument RANGE and the optional 3rd
822 argument ARG.
823
824 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
825 characters contained in CHARSET.
826
827 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
828 range of code points of target characters. */)
829 (function, charset, arg, from_code, to_code)
830 Lisp_Object function, charset, arg, from_code, to_code;
831 {
832 struct charset *cs;
833 unsigned from, to;
834
835 CHECK_CHARSET_GET_CHARSET (charset, cs);
836 if (NILP (from_code))
837 from = CHARSET_MIN_CODE (cs);
838 else
839 {
840 CHECK_NATNUM (from_code);
841 from = XINT (from_code);
842 if (from < CHARSET_MIN_CODE (cs))
843 from = CHARSET_MIN_CODE (cs);
844 }
845 if (NILP (to_code))
846 to = CHARSET_MAX_CODE (cs);
847 else
848 {
849 CHECK_NATNUM (to_code);
850 to = XINT (to_code);
851 if (to > CHARSET_MAX_CODE (cs))
852 to = CHARSET_MAX_CODE (cs);
853 }
854 map_charset_chars (NULL, function, arg, cs, from, to);
855 return Qnil;
856 }
857
858
859 /* Define a charset according to the arguments. The Nth argument is
860 the Nth attribute of the charset (the last attribute `charset-id'
861 is not included). See the docstring of `define-charset' for the
862 detail. */
863
864 DEFUN ("define-charset-internal", Fdefine_charset_internal,
865 Sdefine_charset_internal, charset_arg_max, MANY, 0,
866 doc: /* For internal use only.
867 usage: (define-charset-internal ...) */)
868 (nargs, args)
869 int nargs;
870 Lisp_Object *args;
871 {
872 /* Charset attr vector. */
873 Lisp_Object attrs;
874 Lisp_Object val;
875 unsigned hash_code;
876 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
877 int i, j;
878 struct charset charset;
879 int id;
880 int dimension;
881 int new_definition_p;
882 int nchars;
883
884 if (nargs != charset_arg_max)
885 return Fsignal (Qwrong_number_of_arguments,
886 Fcons (intern ("define-charset-internal"),
887 make_number (nargs)));
888
889 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
890
891 CHECK_SYMBOL (args[charset_arg_name]);
892 ASET (attrs, charset_name, args[charset_arg_name]);
893
894 val = args[charset_arg_code_space];
895 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
896 {
897 int min_byte, max_byte;
898
899 min_byte = XINT (Faref (val, make_number (i * 2)));
900 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
901 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
902 error ("Invalid :code-space value");
903 charset.code_space[i * 4] = min_byte;
904 charset.code_space[i * 4 + 1] = max_byte;
905 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
906 nchars *= charset.code_space[i * 4 + 2];
907 charset.code_space[i * 4 + 3] = nchars;
908 if (max_byte > 0)
909 dimension = i + 1;
910 }
911
912 val = args[charset_arg_dimension];
913 if (NILP (val))
914 charset.dimension = dimension;
915 else
916 {
917 CHECK_NATNUM (val);
918 charset.dimension = XINT (val);
919 if (charset.dimension < 1 || charset.dimension > 4)
920 args_out_of_range_3 (val, make_number (1), make_number (4));
921 }
922
923 charset.code_linear_p
924 = (charset.dimension == 1
925 || (charset.code_space[2] == 256
926 && (charset.dimension == 2
927 || (charset.code_space[6] == 256
928 && (charset.dimension == 3
929 || charset.code_space[10] == 256)))));
930
931 if (! charset.code_linear_p)
932 {
933 charset.code_space_mask = (unsigned char *) xmalloc (256);
934 bzero (charset.code_space_mask, 256);
935 for (i = 0; i < 4; i++)
936 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
937 j++)
938 charset.code_space_mask[j] |= (1 << i);
939 }
940
941 charset.iso_chars_96 = charset.code_space[2] == 96;
942
943 charset.min_code = (charset.code_space[0]
944 | (charset.code_space[4] << 8)
945 | (charset.code_space[8] << 16)
946 | (charset.code_space[12] << 24));
947 charset.max_code = (charset.code_space[1]
948 | (charset.code_space[5] << 8)
949 | (charset.code_space[9] << 16)
950 | (charset.code_space[13] << 24));
951 charset.char_index_offset = 0;
952
953 val = args[charset_arg_min_code];
954 if (! NILP (val))
955 {
956 unsigned code;
957
958 if (INTEGERP (val))
959 code = XINT (val);
960 else
961 {
962 CHECK_CONS (val);
963 CHECK_NUMBER_CAR (val);
964 CHECK_NUMBER_CDR (val);
965 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
966 }
967 if (code < charset.min_code
968 || code > charset.max_code)
969 args_out_of_range_3 (make_number (charset.min_code),
970 make_number (charset.max_code), val);
971 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
972 charset.min_code = code;
973 }
974
975 val = args[charset_arg_max_code];
976 if (! NILP (val))
977 {
978 unsigned code;
979
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
983 {
984 CHECK_CONS (val);
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
988 }
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.max_code = code;
994 }
995
996 charset.compact_codes_p = charset.max_code < 0x10000;
997
998 val = args[charset_arg_invalid_code];
999 if (NILP (val))
1000 {
1001 if (charset.min_code > 0)
1002 charset.invalid_code = 0;
1003 else
1004 {
1005 XSETINT (val, charset.max_code + 1);
1006 if (XINT (val) == charset.max_code + 1)
1007 charset.invalid_code = charset.max_code + 1;
1008 else
1009 error ("Attribute :invalid-code must be specified");
1010 }
1011 }
1012 else
1013 {
1014 CHECK_NATNUM (val);
1015 charset.invalid_code = XFASTINT (val);
1016 }
1017
1018 val = args[charset_arg_iso_final];
1019 if (NILP (val))
1020 charset.iso_final = -1;
1021 else
1022 {
1023 CHECK_NUMBER (val);
1024 if (XINT (val) < '0' || XINT (val) > 127)
1025 error ("Invalid iso-final-char: %d", XINT (val));
1026 charset.iso_final = XINT (val);
1027 }
1028
1029 val = args[charset_arg_iso_revision];
1030 if (NILP (val))
1031 charset.iso_revision = -1;
1032 else
1033 {
1034 CHECK_NUMBER (val);
1035 if (XINT (val) > 63)
1036 args_out_of_range (make_number (63), val);
1037 charset.iso_revision = XINT (val);
1038 }
1039
1040 val = args[charset_arg_emacs_mule_id];
1041 if (NILP (val))
1042 charset.emacs_mule_id = -1;
1043 else
1044 {
1045 CHECK_NATNUM (val);
1046 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1047 error ("Invalid emacs-mule-id: %d", XINT (val));
1048 charset.emacs_mule_id = XINT (val);
1049 }
1050
1051 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1052
1053 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1054
1055 charset.unified_p = 0;
1056
1057 bzero (charset.fast_map, sizeof (charset.fast_map));
1058
1059 if (! NILP (args[charset_arg_code_offset]))
1060 {
1061 val = args[charset_arg_code_offset];
1062 CHECK_NUMBER (val);
1063
1064 charset.method = CHARSET_METHOD_OFFSET;
1065 charset.code_offset = XINT (val);
1066
1067 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1068 charset.min_char = i + charset.code_offset;
1069 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1070 charset.max_char = i + charset.code_offset;
1071 if (charset.max_char > MAX_CHAR)
1072 error ("Unsupported max char: %d", charset.max_char);
1073
1074 i = (charset.min_char >> 7) << 7;
1075 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1076 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1077 i = (i >> 12) << 12;
1078 for (; i <= charset.max_char; i += 0x1000)
1079 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1080 }
1081 else if (! NILP (args[charset_arg_map]))
1082 {
1083 val = args[charset_arg_map];
1084 ASET (attrs, charset_map, val);
1085 charset.method = CHARSET_METHOD_MAP;
1086 }
1087 else if (! NILP (args[charset_arg_subset]))
1088 {
1089 Lisp_Object parent;
1090 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1091 struct charset *parent_charset;
1092
1093 val = args[charset_arg_subset];
1094 parent = Fcar (val);
1095 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1096 parent_min_code = Fnth (make_number (1), val);
1097 CHECK_NATNUM (parent_min_code);
1098 parent_max_code = Fnth (make_number (2), val);
1099 CHECK_NATNUM (parent_max_code);
1100 parent_code_offset = Fnth (make_number (3), val);
1101 CHECK_NUMBER (parent_code_offset);
1102 val = Fmake_vector (make_number (4), Qnil);
1103 ASET (val, 0, make_number (parent_charset->id));
1104 ASET (val, 1, parent_min_code);
1105 ASET (val, 2, parent_max_code);
1106 ASET (val, 3, parent_code_offset);
1107 ASET (attrs, charset_subset, val);
1108
1109 charset.method = CHARSET_METHOD_SUBSET;
1110 /* Here, we just copy the parent's fast_map. It's not accurate,
1111 but at least it works for quickly detecting which character
1112 DOESN'T belong to this charset. */
1113 for (i = 0; i < 190; i++)
1114 charset.fast_map[i] = parent_charset->fast_map[i];
1115
1116 /* We also copy these for parents. */
1117 charset.min_char = parent_charset->min_char;
1118 charset.max_char = parent_charset->max_char;
1119 }
1120 else if (! NILP (args[charset_arg_superset]))
1121 {
1122 val = args[charset_arg_superset];
1123 charset.method = CHARSET_METHOD_SUPERSET;
1124 val = Fcopy_sequence (val);
1125 ASET (attrs, charset_superset, val);
1126
1127 charset.min_char = MAX_CHAR;
1128 charset.max_char = 0;
1129 for (; ! NILP (val); val = Fcdr (val))
1130 {
1131 Lisp_Object elt, car_part, cdr_part;
1132 int this_id, offset;
1133 struct charset *this_charset;
1134
1135 elt = Fcar (val);
1136 if (CONSP (elt))
1137 {
1138 car_part = XCAR (elt);
1139 cdr_part = XCDR (elt);
1140 CHECK_CHARSET_GET_ID (car_part, this_id);
1141 CHECK_NUMBER (cdr_part);
1142 offset = XINT (cdr_part);
1143 }
1144 else
1145 {
1146 CHECK_CHARSET_GET_ID (elt, this_id);
1147 offset = 0;
1148 }
1149 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1150
1151 this_charset = CHARSET_FROM_ID (this_id);
1152 if (charset.min_char > this_charset->min_char)
1153 charset.min_char = this_charset->min_char;
1154 if (charset.max_char < this_charset->max_char)
1155 charset.max_char = this_charset->max_char;
1156 for (i = 0; i < 190; i++)
1157 charset.fast_map[i] |= this_charset->fast_map[i];
1158 }
1159 }
1160 else
1161 error ("None of :code-offset, :map, :parents are specified");
1162
1163 val = args[charset_arg_unify_map];
1164 if (! NILP (val) && !STRINGP (val))
1165 CHECK_VECTOR (val);
1166 ASET (attrs, charset_unify_map, val);
1167
1168 CHECK_LIST (args[charset_arg_plist]);
1169 ASET (attrs, charset_plist, args[charset_arg_plist]);
1170
1171 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1172 &hash_code);
1173 if (charset.hash_index >= 0)
1174 {
1175 new_definition_p = 0;
1176 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1177 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1178 }
1179 else
1180 {
1181 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1182 hash_code);
1183 if (charset_table_used == charset_table_size)
1184 {
1185 struct charset *new_table
1186 = (struct charset *) xmalloc (sizeof (struct charset)
1187 * (charset_table_size + 16));
1188 bcopy (charset_table, new_table,
1189 sizeof (struct charset) * charset_table_size);
1190 charset_table_size += 16;
1191 charset_table = new_table;
1192 }
1193 id = charset_table_used++;
1194 new_definition_p = 1;
1195 }
1196
1197 ASET (attrs, charset_id, make_number (id));
1198 charset.id = id;
1199 charset_table[id] = charset;
1200
1201 if (charset.method == CHARSET_METHOD_MAP)
1202 {
1203 load_charset (&charset, 0);
1204 charset_table[id] = charset;
1205 }
1206
1207 if (charset.iso_final >= 0)
1208 {
1209 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1210 charset.iso_final) = id;
1211 if (new_definition_p)
1212 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1213 Fcons (make_number (id), Qnil));
1214 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1215 charset_jisx0201_roman = id;
1216 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1217 charset_jisx0208_1978 = id;
1218 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1219 charset_jisx0208 = id;
1220 }
1221
1222 if (charset.emacs_mule_id >= 0)
1223 {
1224 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1225 if (charset.emacs_mule_id < 0xA0)
1226 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1227 else
1228 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1229 if (new_definition_p)
1230 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1231 Fcons (make_number (id), Qnil));
1232 }
1233
1234 if (new_definition_p)
1235 {
1236 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1237 if (charset.supplementary_p)
1238 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1239 Fcons (make_number (id), Qnil));
1240 else
1241 {
1242 Lisp_Object tail;
1243
1244 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1245 {
1246 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1247
1248 if (cs->supplementary_p)
1249 break;
1250 }
1251 if (EQ (tail, Vcharset_ordered_list))
1252 Vcharset_ordered_list = Fcons (make_number (id),
1253 Vcharset_ordered_list);
1254 else if (NILP (tail))
1255 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1256 Fcons (make_number (id), Qnil));
1257 else
1258 {
1259 val = Fcons (XCAR (tail), XCDR (tail));
1260 XSETCDR (tail, val);
1261 XSETCAR (tail, make_number (id));
1262 }
1263 }
1264 charset_ordered_list_tick++;
1265 }
1266
1267 return Qnil;
1268 }
1269
1270
1271 /* Same as Fdefine_charset_internal but arguments are more convenient
1272 to call from C (typically in syms_of_charset). This can define a
1273 charset of `offset' method only. Return the ID of the new
1274 charset. */
1275
1276 static int
1277 define_charset_internal (name, dimension, code_space, min_code, max_code,
1278 iso_final, iso_revision, emacs_mule_id,
1279 ascii_compatible, supplementary,
1280 code_offset)
1281 Lisp_Object name;
1282 int dimension;
1283 unsigned char *code_space;
1284 unsigned min_code, max_code;
1285 int iso_final, iso_revision, emacs_mule_id;
1286 int ascii_compatible, supplementary;
1287 int code_offset;
1288 {
1289 Lisp_Object args[charset_arg_max];
1290 Lisp_Object plist[14];
1291 Lisp_Object val;
1292 int i;
1293
1294 args[charset_arg_name] = name;
1295 args[charset_arg_dimension] = make_number (dimension);
1296 val = Fmake_vector (make_number (8), make_number (0));
1297 for (i = 0; i < 8; i++)
1298 ASET (val, i, make_number (code_space[i]));
1299 args[charset_arg_code_space] = val;
1300 args[charset_arg_min_code] = make_number (min_code);
1301 args[charset_arg_max_code] = make_number (max_code);
1302 args[charset_arg_iso_final]
1303 = (iso_final < 0 ? Qnil : make_number (iso_final));
1304 args[charset_arg_iso_revision] = make_number (iso_revision);
1305 args[charset_arg_emacs_mule_id]
1306 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1307 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1308 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1309 args[charset_arg_invalid_code] = Qnil;
1310 args[charset_arg_code_offset] = make_number (code_offset);
1311 args[charset_arg_map] = Qnil;
1312 args[charset_arg_subset] = Qnil;
1313 args[charset_arg_superset] = Qnil;
1314 args[charset_arg_unify_map] = Qnil;
1315
1316 plist[0] = intern (":name");
1317 plist[1] = args[charset_arg_name];
1318 plist[2] = intern (":dimension");
1319 plist[3] = args[charset_arg_dimension];
1320 plist[4] = intern (":code-space");
1321 plist[5] = args[charset_arg_code_space];
1322 plist[6] = intern (":iso-final-char");
1323 plist[7] = args[charset_arg_iso_final];
1324 plist[8] = intern (":emacs-mule-id");
1325 plist[9] = args[charset_arg_emacs_mule_id];
1326 plist[10] = intern (":ascii-compatible-p");
1327 plist[11] = args[charset_arg_ascii_compatible_p];
1328 plist[12] = intern (":code-offset");
1329 plist[13] = args[charset_arg_code_offset];
1330
1331 args[charset_arg_plist] = Flist (14, plist);
1332 Fdefine_charset_internal (charset_arg_max, args);
1333
1334 return XINT (CHARSET_SYMBOL_ID (name));
1335 }
1336
1337
1338 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1339 Sdefine_charset_alias, 2, 2, 0,
1340 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1341 (alias, charset)
1342 Lisp_Object alias, charset;
1343 {
1344 Lisp_Object attr;
1345
1346 CHECK_CHARSET_GET_ATTR (charset, attr);
1347 Fputhash (alias, attr, Vcharset_hash_table);
1348 Vcharset_list = Fcons (alias, Vcharset_list);
1349 return Qnil;
1350 }
1351
1352
1353 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1354 doc: /* Return the property list of CHARSET. */)
1355 (charset)
1356 Lisp_Object charset;
1357 {
1358 Lisp_Object attrs;
1359
1360 CHECK_CHARSET_GET_ATTR (charset, attrs);
1361 return CHARSET_ATTR_PLIST (attrs);
1362 }
1363
1364
1365 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1366 doc: /* Set CHARSET's property list to PLIST. */)
1367 (charset, plist)
1368 Lisp_Object charset, plist;
1369 {
1370 Lisp_Object attrs;
1371
1372 CHECK_CHARSET_GET_ATTR (charset, attrs);
1373 CHARSET_ATTR_PLIST (attrs) = plist;
1374 return plist;
1375 }
1376
1377
1378 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1379 doc: /* Unify characters of CHARSET with Unicode.
1380 This means reading the relevant file and installing the table defined
1381 by CHARSET's `:unify-map' property.
1382
1383 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1384 the same meaning as the `:unify-map' attribute in the function
1385 `define-charset' (which see).
1386
1387 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1388 (charset, unify_map, deunify)
1389 Lisp_Object charset, unify_map, deunify;
1390 {
1391 int id;
1392 struct charset *cs;
1393
1394 CHECK_CHARSET_GET_ID (charset, id);
1395 cs = CHARSET_FROM_ID (id);
1396 if (NILP (deunify)
1397 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1398 : ! CHARSET_UNIFIED_P (cs))
1399 return Qnil;
1400
1401 CHARSET_UNIFIED_P (cs) = 0;
1402 if (NILP (deunify))
1403 {
1404 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1405 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1406 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1407 if (NILP (unify_map))
1408 unify_map = CHARSET_UNIFY_MAP (cs);
1409 else
1410 {
1411 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1412 signal_error ("Bad unify-map", unify_map);
1413 CHARSET_UNIFY_MAP (cs) = unify_map;
1414 }
1415 if (NILP (Vchar_unify_table))
1416 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1417 char_table_set_range (Vchar_unify_table,
1418 cs->min_char, cs->max_char, charset);
1419 CHARSET_UNIFIED_P (cs) = 1;
1420 }
1421 else if (CHAR_TABLE_P (Vchar_unify_table))
1422 {
1423 int min_code = CHARSET_MIN_CODE (cs);
1424 int max_code = CHARSET_MAX_CODE (cs);
1425 int min_char = DECODE_CHAR (cs, min_code);
1426 int max_char = DECODE_CHAR (cs, max_code);
1427
1428 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1429 }
1430
1431 return Qnil;
1432 }
1433
1434 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1435 Sget_unused_iso_final_char, 2, 2, 0,
1436 doc: /*
1437 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1438 DIMENSION is the number of bytes to represent a character: 1 or 2.
1439 CHARS is the number of characters in a dimension: 94 or 96.
1440
1441 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1442 If there's no unused final char for the specified kind of charset,
1443 return nil. */)
1444 (dimension, chars)
1445 Lisp_Object dimension, chars;
1446 {
1447 int final_char;
1448
1449 CHECK_NUMBER (dimension);
1450 CHECK_NUMBER (chars);
1451 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1452 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1453 if (XINT (chars) != 94 && XINT (chars) != 96)
1454 args_out_of_range_3 (chars, make_number (94), make_number (96));
1455 for (final_char = '0'; final_char <= '?'; final_char++)
1456 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1457 break;
1458 return (final_char <= '?' ? make_number (final_char) : Qnil);
1459 }
1460
1461 static void
1462 check_iso_charset_parameter (dimension, chars, final_char)
1463 Lisp_Object dimension, chars, final_char;
1464 {
1465 CHECK_NATNUM (dimension);
1466 CHECK_NATNUM (chars);
1467 CHECK_NATNUM (final_char);
1468
1469 if (XINT (dimension) > 3)
1470 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1471 if (XINT (chars) != 94 && XINT (chars) != 96)
1472 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1473 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1474 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1475 }
1476
1477
1478 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1479 4, 4, 0,
1480 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1481
1482 On decoding by an ISO-2022 base coding system, when a charset
1483 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1484 if CHARSET is designated instead. */)
1485 (dimension, chars, final_char, charset)
1486 Lisp_Object dimension, chars, final_char, charset;
1487 {
1488 int id;
1489 int chars_flag;
1490
1491 CHECK_CHARSET_GET_ID (charset, id);
1492 check_iso_charset_parameter (dimension, chars, final_char);
1493 chars_flag = XINT (chars) == 96;
1494 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1495 return Qnil;
1496 }
1497
1498
1499 /* Return information about charsets in the text at PTR of NBYTES
1500 bytes, which are NCHARS characters. The value is:
1501
1502 0: Each character is represented by one byte. This is always
1503 true for a unibyte string. For a multibyte string, true if
1504 it contains only ASCII characters.
1505
1506 1: No charsets other than ascii, control-1, and latin-1 are
1507 found.
1508
1509 2: Otherwise.
1510 */
1511
1512 int
1513 string_xstring_p (string)
1514 Lisp_Object string;
1515 {
1516 const unsigned char *p = SDATA (string);
1517 const unsigned char *endp = p + SBYTES (string);
1518
1519 if (SCHARS (string) == SBYTES (string))
1520 return 0;
1521
1522 while (p < endp)
1523 {
1524 int c = STRING_CHAR_ADVANCE (p);
1525
1526 if (c >= 0x100)
1527 return 2;
1528 }
1529 return 1;
1530 }
1531
1532
1533 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1534
1535 CHARSETS is a vector. If Nth element is non-nil, it means the
1536 charset whose id is N is already found.
1537
1538 It may lookup a translation table TABLE if supplied. */
1539
1540 static void
1541 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1542 const unsigned char *ptr;
1543 EMACS_INT nchars, nbytes;
1544 Lisp_Object charsets, table;
1545 int multibyte;
1546 {
1547 const unsigned char *pend = ptr + nbytes;
1548
1549 if (nchars == nbytes)
1550 {
1551 if (multibyte)
1552 ASET (charsets, charset_ascii, Qt);
1553 else
1554 while (ptr < pend)
1555 {
1556 int c = *ptr++;
1557
1558 if (!NILP (table))
1559 c = translate_char (table, c);
1560 if (ASCII_BYTE_P (c))
1561 ASET (charsets, charset_ascii, Qt);
1562 else
1563 ASET (charsets, charset_eight_bit, Qt);
1564 }
1565 }
1566 else
1567 {
1568 while (ptr < pend)
1569 {
1570 int c = STRING_CHAR_ADVANCE (ptr);
1571 struct charset *charset;
1572
1573 if (!NILP (table))
1574 c = translate_char (table, c);
1575 charset = CHAR_CHARSET (c);
1576 ASET (charsets, CHARSET_ID (charset), Qt);
1577 }
1578 }
1579 }
1580
1581 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1582 2, 3, 0,
1583 doc: /* Return a list of charsets in the region between BEG and END.
1584 BEG and END are buffer positions.
1585 Optional arg TABLE if non-nil is a translation table to look up.
1586
1587 If the current buffer is unibyte, the returned list may contain
1588 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1589 (beg, end, table)
1590 Lisp_Object beg, end, table;
1591 {
1592 Lisp_Object charsets;
1593 EMACS_INT from, from_byte, to, stop, stop_byte;
1594 int i;
1595 Lisp_Object val;
1596 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1597
1598 validate_region (&beg, &end);
1599 from = XFASTINT (beg);
1600 stop = to = XFASTINT (end);
1601
1602 if (from < GPT && GPT < to)
1603 {
1604 stop = GPT;
1605 stop_byte = GPT_BYTE;
1606 }
1607 else
1608 stop_byte = CHAR_TO_BYTE (stop);
1609
1610 from_byte = CHAR_TO_BYTE (from);
1611
1612 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1613 while (1)
1614 {
1615 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1616 stop_byte - from_byte, charsets, table,
1617 multibyte);
1618 if (stop < to)
1619 {
1620 from = stop, from_byte = stop_byte;
1621 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1622 }
1623 else
1624 break;
1625 }
1626
1627 val = Qnil;
1628 for (i = charset_table_used - 1; i >= 0; i--)
1629 if (!NILP (AREF (charsets, i)))
1630 val = Fcons (CHARSET_NAME (charset_table + i), val);
1631 return val;
1632 }
1633
1634 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1635 1, 2, 0,
1636 doc: /* Return a list of charsets in STR.
1637 Optional arg TABLE if non-nil is a translation table to look up.
1638
1639 If STR is unibyte, the returned list may contain
1640 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1641 (str, table)
1642 Lisp_Object str, table;
1643 {
1644 Lisp_Object charsets;
1645 int i;
1646 Lisp_Object val;
1647
1648 CHECK_STRING (str);
1649
1650 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1651 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1652 charsets, table,
1653 STRING_MULTIBYTE (str));
1654 val = Qnil;
1655 for (i = charset_table_used - 1; i >= 0; i--)
1656 if (!NILP (AREF (charsets, i)))
1657 val = Fcons (CHARSET_NAME (charset_table + i), val);
1658 return val;
1659 }
1660
1661 \f
1662
1663 /* Return a unified character code for C (>= 0x110000). VAL is a
1664 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1665 charset symbol. */
1666 int
1667 maybe_unify_char (c, val)
1668 int c;
1669 Lisp_Object val;
1670 {
1671 struct charset *charset;
1672
1673 if (INTEGERP (val))
1674 return XINT (val);
1675 if (NILP (val))
1676 return c;
1677
1678 CHECK_CHARSET_GET_CHARSET (val, charset);
1679 load_charset (charset, 1);
1680 if (! inhibit_load_charset_map)
1681 {
1682 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1683 if (! NILP (val))
1684 c = XINT (val);
1685 }
1686 else
1687 {
1688 int code_index = c - CHARSET_CODE_OFFSET (charset);
1689 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1690
1691 if (unified > 0)
1692 c = unified;
1693 }
1694 return c;
1695 }
1696
1697
1698 /* Return a character correponding to the code-point CODE of
1699 CHARSET. */
1700
1701 int
1702 decode_char (charset, code)
1703 struct charset *charset;
1704 unsigned code;
1705 {
1706 int c, char_index;
1707 enum charset_method method = CHARSET_METHOD (charset);
1708
1709 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1710 return -1;
1711
1712 if (method == CHARSET_METHOD_SUBSET)
1713 {
1714 Lisp_Object subset_info;
1715
1716 subset_info = CHARSET_SUBSET (charset);
1717 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1718 code -= XINT (AREF (subset_info, 3));
1719 if (code < XFASTINT (AREF (subset_info, 1))
1720 || code > XFASTINT (AREF (subset_info, 2)))
1721 c = -1;
1722 else
1723 c = DECODE_CHAR (charset, code);
1724 }
1725 else if (method == CHARSET_METHOD_SUPERSET)
1726 {
1727 Lisp_Object parents;
1728
1729 parents = CHARSET_SUPERSET (charset);
1730 c = -1;
1731 for (; CONSP (parents); parents = XCDR (parents))
1732 {
1733 int id = XINT (XCAR (XCAR (parents)));
1734 int code_offset = XINT (XCDR (XCAR (parents)));
1735 unsigned this_code = code - code_offset;
1736
1737 charset = CHARSET_FROM_ID (id);
1738 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1739 break;
1740 }
1741 }
1742 else
1743 {
1744 char_index = CODE_POINT_TO_INDEX (charset, code);
1745 if (char_index < 0)
1746 return -1;
1747
1748 if (method == CHARSET_METHOD_MAP)
1749 {
1750 Lisp_Object decoder;
1751
1752 decoder = CHARSET_DECODER (charset);
1753 if (! VECTORP (decoder))
1754 {
1755 load_charset (charset, 1);
1756 decoder = CHARSET_DECODER (charset);
1757 }
1758 if (VECTORP (decoder))
1759 c = XINT (AREF (decoder, char_index));
1760 else
1761 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1762 }
1763 else /* method == CHARSET_METHOD_OFFSET */
1764 {
1765 c = char_index + CHARSET_CODE_OFFSET (charset);
1766 if (CHARSET_UNIFIED_P (charset)
1767 && c > MAX_UNICODE_CHAR)
1768 MAYBE_UNIFY_CHAR (c);
1769 }
1770 }
1771
1772 return c;
1773 }
1774
1775 /* Variable used temporarily by the macro ENCODE_CHAR. */
1776 Lisp_Object charset_work;
1777
1778 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1779 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1780 use CHARSET's strict_max_char instead of max_char. */
1781
1782 unsigned
1783 encode_char (charset, c)
1784 struct charset *charset;
1785 int c;
1786 {
1787 unsigned code;
1788 enum charset_method method = CHARSET_METHOD (charset);
1789
1790 if (CHARSET_UNIFIED_P (charset))
1791 {
1792 Lisp_Object deunifier, deunified;
1793 int code_index = -1;
1794
1795 deunifier = CHARSET_DEUNIFIER (charset);
1796 if (! CHAR_TABLE_P (deunifier))
1797 {
1798 load_charset (charset, 2);
1799 deunifier = CHARSET_DEUNIFIER (charset);
1800 }
1801 if (CHAR_TABLE_P (deunifier))
1802 {
1803 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1804
1805 if (INTEGERP (deunified))
1806 code_index = XINT (deunified);
1807 }
1808 else
1809 {
1810 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1811 }
1812 if (code_index >= 0)
1813 c = CHARSET_CODE_OFFSET (charset) + code_index;
1814 }
1815
1816 if (method == CHARSET_METHOD_SUBSET)
1817 {
1818 Lisp_Object subset_info;
1819 struct charset *this_charset;
1820
1821 subset_info = CHARSET_SUBSET (charset);
1822 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1823 code = ENCODE_CHAR (this_charset, c);
1824 if (code == CHARSET_INVALID_CODE (this_charset)
1825 || code < XFASTINT (AREF (subset_info, 1))
1826 || code > XFASTINT (AREF (subset_info, 2)))
1827 return CHARSET_INVALID_CODE (charset);
1828 code += XINT (AREF (subset_info, 3));
1829 return code;
1830 }
1831
1832 if (method == CHARSET_METHOD_SUPERSET)
1833 {
1834 Lisp_Object parents;
1835
1836 parents = CHARSET_SUPERSET (charset);
1837 for (; CONSP (parents); parents = XCDR (parents))
1838 {
1839 int id = XINT (XCAR (XCAR (parents)));
1840 int code_offset = XINT (XCDR (XCAR (parents)));
1841 struct charset *this_charset = CHARSET_FROM_ID (id);
1842
1843 code = ENCODE_CHAR (this_charset, c);
1844 if (code != CHARSET_INVALID_CODE (this_charset))
1845 return code + code_offset;
1846 }
1847 return CHARSET_INVALID_CODE (charset);
1848 }
1849
1850 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1851 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1852 return CHARSET_INVALID_CODE (charset);
1853
1854 if (method == CHARSET_METHOD_MAP)
1855 {
1856 Lisp_Object encoder;
1857 Lisp_Object val;
1858
1859 encoder = CHARSET_ENCODER (charset);
1860 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1861 {
1862 load_charset (charset, 2);
1863 encoder = CHARSET_ENCODER (charset);
1864 }
1865 if (CHAR_TABLE_P (encoder))
1866 {
1867 val = CHAR_TABLE_REF (encoder, c);
1868 if (NILP (val))
1869 return CHARSET_INVALID_CODE (charset);
1870 code = XINT (val);
1871 if (! CHARSET_COMPACT_CODES_P (charset))
1872 code = INDEX_TO_CODE_POINT (charset, code);
1873 }
1874 else
1875 {
1876 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1877 code = INDEX_TO_CODE_POINT (charset, code);
1878 }
1879 }
1880 else /* method == CHARSET_METHOD_OFFSET */
1881 {
1882 int code_index = c - CHARSET_CODE_OFFSET (charset);
1883
1884 code = INDEX_TO_CODE_POINT (charset, code_index);
1885 }
1886
1887 return code;
1888 }
1889
1890
1891 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1892 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1893 Return nil if CODE-POINT is not valid in CHARSET.
1894
1895 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1896
1897 Optional argument RESTRICTION specifies a way to map the pair of CCS
1898 and CODE-POINT to a character. Currently not supported and just ignored. */)
1899 (charset, code_point, restriction)
1900 Lisp_Object charset, code_point, restriction;
1901 {
1902 int c, id;
1903 unsigned code;
1904 struct charset *charsetp;
1905
1906 CHECK_CHARSET_GET_ID (charset, id);
1907 if (CONSP (code_point))
1908 {
1909 CHECK_NATNUM_CAR (code_point);
1910 CHECK_NATNUM_CDR (code_point);
1911 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1912 }
1913 else
1914 {
1915 CHECK_NATNUM (code_point);
1916 code = XINT (code_point);
1917 }
1918 charsetp = CHARSET_FROM_ID (id);
1919 c = DECODE_CHAR (charsetp, code);
1920 return (c >= 0 ? make_number (c) : Qnil);
1921 }
1922
1923
1924 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1925 doc: /* Encode the character CH into a code-point of CHARSET.
1926 Return nil if CHARSET doesn't include CH.
1927
1928 Optional argument RESTRICTION specifies a way to map CH to a
1929 code-point in CCS. Currently not supported and just ignored. */)
1930 (ch, charset, restriction)
1931 Lisp_Object ch, charset, restriction;
1932 {
1933 int id;
1934 unsigned code;
1935 struct charset *charsetp;
1936
1937 CHECK_CHARSET_GET_ID (charset, id);
1938 CHECK_NATNUM (ch);
1939 charsetp = CHARSET_FROM_ID (id);
1940 code = ENCODE_CHAR (charsetp, XINT (ch));
1941 if (code == CHARSET_INVALID_CODE (charsetp))
1942 return Qnil;
1943 if (code > 0x7FFFFFF)
1944 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1945 return make_number (code);
1946 }
1947
1948
1949 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1950 doc:
1951 /* Return a character of CHARSET whose position codes are CODEn.
1952
1953 CODE1 through CODE4 are optional, but if you don't supply sufficient
1954 position codes, it is assumed that the minimum code in each dimension
1955 is specified. */)
1956 (charset, code1, code2, code3, code4)
1957 Lisp_Object charset, code1, code2, code3, code4;
1958 {
1959 int id, dimension;
1960 struct charset *charsetp;
1961 unsigned code;
1962 int c;
1963
1964 CHECK_CHARSET_GET_ID (charset, id);
1965 charsetp = CHARSET_FROM_ID (id);
1966
1967 dimension = CHARSET_DIMENSION (charsetp);
1968 if (NILP (code1))
1969 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1970 ? 0 : CHARSET_MIN_CODE (charsetp));
1971 else
1972 {
1973 CHECK_NATNUM (code1);
1974 if (XFASTINT (code1) >= 0x100)
1975 args_out_of_range (make_number (0xFF), code1);
1976 code = XFASTINT (code1);
1977
1978 if (dimension > 1)
1979 {
1980 code <<= 8;
1981 if (NILP (code2))
1982 code |= charsetp->code_space[(dimension - 2) * 4];
1983 else
1984 {
1985 CHECK_NATNUM (code2);
1986 if (XFASTINT (code2) >= 0x100)
1987 args_out_of_range (make_number (0xFF), code2);
1988 code |= XFASTINT (code2);
1989 }
1990
1991 if (dimension > 2)
1992 {
1993 code <<= 8;
1994 if (NILP (code3))
1995 code |= charsetp->code_space[(dimension - 3) * 4];
1996 else
1997 {
1998 CHECK_NATNUM (code3);
1999 if (XFASTINT (code3) >= 0x100)
2000 args_out_of_range (make_number (0xFF), code3);
2001 code |= XFASTINT (code3);
2002 }
2003
2004 if (dimension > 3)
2005 {
2006 code <<= 8;
2007 if (NILP (code4))
2008 code |= charsetp->code_space[0];
2009 else
2010 {
2011 CHECK_NATNUM (code4);
2012 if (XFASTINT (code4) >= 0x100)
2013 args_out_of_range (make_number (0xFF), code4);
2014 code |= XFASTINT (code4);
2015 }
2016 }
2017 }
2018 }
2019 }
2020
2021 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2022 code &= 0x7F7F7F7F;
2023 c = DECODE_CHAR (charsetp, code);
2024 if (c < 0)
2025 error ("Invalid code(s)");
2026 return make_number (c);
2027 }
2028
2029
2030 /* Return the first charset in CHARSET_LIST that contains C.
2031 CHARSET_LIST is a list of charset IDs. If it is nil, use
2032 Vcharset_ordered_list. */
2033
2034 struct charset *
2035 char_charset (c, charset_list, code_return)
2036 int c;
2037 Lisp_Object charset_list;
2038 unsigned *code_return;
2039 {
2040 int maybe_null = 0;
2041
2042 if (NILP (charset_list))
2043 charset_list = Vcharset_ordered_list;
2044 else
2045 maybe_null = 1;
2046
2047 while (CONSP (charset_list))
2048 {
2049 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2050 unsigned code = ENCODE_CHAR (charset, c);
2051
2052 if (code != CHARSET_INVALID_CODE (charset))
2053 {
2054 if (code_return)
2055 *code_return = code;
2056 return charset;
2057 }
2058 charset_list = XCDR (charset_list);
2059 if (c <= MAX_UNICODE_CHAR
2060 && EQ (charset_list, Vcharset_non_preferred_head))
2061 return CHARSET_FROM_ID (charset_unicode);
2062 }
2063 return (maybe_null ? NULL
2064 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2065 : CHARSET_FROM_ID (charset_eight_bit));
2066 }
2067
2068
2069 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2070 doc:
2071 /*Return list of charset and one to four position-codes of CH.
2072 The charset is decided by the current priority order of charsets.
2073 A position-code is a byte value of each dimension of the code-point of
2074 CH in the charset. */)
2075 (ch)
2076 Lisp_Object ch;
2077 {
2078 struct charset *charset;
2079 int c, dimension;
2080 unsigned code;
2081 Lisp_Object val;
2082
2083 CHECK_CHARACTER (ch);
2084 c = XFASTINT (ch);
2085 charset = CHAR_CHARSET (c);
2086 if (! charset)
2087 abort ();
2088 code = ENCODE_CHAR (charset, c);
2089 if (code == CHARSET_INVALID_CODE (charset))
2090 abort ();
2091 dimension = CHARSET_DIMENSION (charset);
2092 for (val = Qnil; dimension > 0; dimension--)
2093 {
2094 val = Fcons (make_number (code & 0xFF), val);
2095 code >>= 8;
2096 }
2097 return Fcons (CHARSET_NAME (charset), val);
2098 }
2099
2100
2101 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
2102 doc: /* Return the charset of highest priority that contains CH. */)
2103 (ch)
2104 Lisp_Object ch;
2105 {
2106 struct charset *charset;
2107
2108 CHECK_CHARACTER (ch);
2109 charset = CHAR_CHARSET (XINT (ch));
2110 return (CHARSET_NAME (charset));
2111 }
2112
2113
2114 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2115 doc: /*
2116 Return charset of a character in the current buffer at position POS.
2117 If POS is nil, it defauls to the current point.
2118 If POS is out of range, the value is nil. */)
2119 (pos)
2120 Lisp_Object pos;
2121 {
2122 Lisp_Object ch;
2123 struct charset *charset;
2124
2125 ch = Fchar_after (pos);
2126 if (! INTEGERP (ch))
2127 return ch;
2128 charset = CHAR_CHARSET (XINT (ch));
2129 return (CHARSET_NAME (charset));
2130 }
2131
2132
2133 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2134 doc: /*
2135 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2136
2137 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2138 by their DIMENSION, CHARS, and FINAL-CHAR,
2139 whereas Emacs distinguishes them by charset symbol.
2140 See the documentation of the function `charset-info' for the meanings of
2141 DIMENSION, CHARS, and FINAL-CHAR. */)
2142 (dimension, chars, final_char)
2143 Lisp_Object dimension, chars, final_char;
2144 {
2145 int id;
2146 int chars_flag;
2147
2148 check_iso_charset_parameter (dimension, chars, final_char);
2149 chars_flag = XFASTINT (chars) == 96;
2150 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2151 XFASTINT (final_char));
2152 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2153 }
2154
2155
2156 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2157 0, 0, 0,
2158 doc: /*
2159 Internal use only.
2160 Clear temporary charset mapping tables.
2161 It should be called only from temacs invoked for dumping. */)
2162 ()
2163 {
2164 int i;
2165 struct charset *charset;
2166 Lisp_Object attrs;
2167
2168 if (temp_charset_work)
2169 {
2170 free (temp_charset_work);
2171 temp_charset_work = NULL;
2172 }
2173
2174 if (CHAR_TABLE_P (Vchar_unify_table))
2175 Foptimize_char_table (Vchar_unify_table, Qnil);
2176
2177 return Qnil;
2178 }
2179
2180 DEFUN ("charset-priority-list", Fcharset_priority_list,
2181 Scharset_priority_list, 0, 1, 0,
2182 doc: /* Return the list of charsets ordered by priority.
2183 HIGHESTP non-nil means just return the highest priority one. */)
2184 (highestp)
2185 Lisp_Object highestp;
2186 {
2187 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2188
2189 if (!NILP (highestp))
2190 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2191
2192 while (!NILP (list))
2193 {
2194 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2195 list = XCDR (list);
2196 }
2197 return Fnreverse (val);
2198 }
2199
2200 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2201 1, MANY, 0,
2202 doc: /* Assign higher priority to the charsets given as arguments.
2203 usage: (set-charset-priority &rest charsets) */)
2204 (nargs, args)
2205 int nargs;
2206 Lisp_Object *args;
2207 {
2208 Lisp_Object new_head, old_list, arglist[2];
2209 Lisp_Object list_2022, list_emacs_mule;
2210 int i, id;
2211
2212 old_list = Fcopy_sequence (Vcharset_ordered_list);
2213 new_head = Qnil;
2214 for (i = 0; i < nargs; i++)
2215 {
2216 CHECK_CHARSET_GET_ID (args[i], id);
2217 if (! NILP (Fmemq (make_number (id), old_list)))
2218 {
2219 old_list = Fdelq (make_number (id), old_list);
2220 new_head = Fcons (make_number (id), new_head);
2221 }
2222 }
2223 arglist[0] = Fnreverse (new_head);
2224 arglist[1] = Vcharset_non_preferred_head = old_list;
2225 Vcharset_ordered_list = Fnconc (2, arglist);
2226 charset_ordered_list_tick++;
2227
2228 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2229 CONSP (old_list); old_list = XCDR (old_list))
2230 {
2231 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2232 list_2022 = Fcons (XCAR (old_list), list_2022);
2233 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2234 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2235 }
2236 Viso_2022_charset_list = Fnreverse (list_2022);
2237 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2238
2239 return Qnil;
2240 }
2241
2242 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2243 0, 1, 0,
2244 doc: /* Internal use only.
2245 Return charset identification number of CHARSET. */)
2246 (charset)
2247 Lisp_Object charset;
2248 {
2249 int id;
2250
2251 CHECK_CHARSET_GET_ID (charset, id);
2252 return make_number (id);
2253 }
2254
2255 \f
2256 void
2257 init_charset ()
2258 {
2259 Lisp_Object tempdir;
2260 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2261 if (access (SDATA (tempdir), 0) < 0)
2262 {
2263 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2264 Emacs will not function correctly without the character map files.\n\
2265 Please check your installation!\n",
2266 tempdir);
2267 /* TODO should this be a fatal error? (Bug#909) */
2268 }
2269
2270 Vcharset_map_path = Fcons (tempdir, Qnil);
2271 }
2272
2273
2274 void
2275 init_charset_once ()
2276 {
2277 int i, j, k;
2278
2279 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2280 for (j = 0; j < ISO_MAX_CHARS; j++)
2281 for (k = 0; k < ISO_MAX_FINAL; k++)
2282 iso_charset_table[i][j][k] = -1;
2283
2284 for (i = 0; i < 256; i++)
2285 emacs_mule_charset[i] = NULL;
2286
2287 charset_jisx0201_roman = -1;
2288 charset_jisx0208_1978 = -1;
2289 charset_jisx0208 = -1;
2290
2291 for (i = 0; i < 128; i++)
2292 unibyte_to_multibyte_table[i] = i;
2293 for (; i < 256; i++)
2294 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2295 }
2296
2297 #ifdef emacs
2298
2299 void
2300 syms_of_charset ()
2301 {
2302 DEFSYM (Qcharsetp, "charsetp");
2303
2304 DEFSYM (Qascii, "ascii");
2305 DEFSYM (Qunicode, "unicode");
2306 DEFSYM (Qemacs, "emacs");
2307 DEFSYM (Qeight_bit, "eight-bit");
2308 DEFSYM (Qiso_8859_1, "iso-8859-1");
2309
2310 DEFSYM (Qgl, "gl");
2311 DEFSYM (Qgr, "gr");
2312
2313 staticpro (&Vcharset_ordered_list);
2314 Vcharset_ordered_list = Qnil;
2315
2316 staticpro (&Viso_2022_charset_list);
2317 Viso_2022_charset_list = Qnil;
2318
2319 staticpro (&Vemacs_mule_charset_list);
2320 Vemacs_mule_charset_list = Qnil;
2321
2322 /* Don't staticpro them here. It's done in syms_of_fns. */
2323 QCtest = intern (":test");
2324 Qeq = intern ("eq");
2325
2326 staticpro (&Vcharset_hash_table);
2327 {
2328 Lisp_Object args[2];
2329 args[0] = QCtest;
2330 args[1] = Qeq;
2331 Vcharset_hash_table = Fmake_hash_table (2, args);
2332 }
2333
2334 charset_table_size = 128;
2335 charset_table = ((struct charset *)
2336 xmalloc (sizeof (struct charset) * charset_table_size));
2337 charset_table_used = 0;
2338
2339 defsubr (&Scharsetp);
2340 defsubr (&Smap_charset_chars);
2341 defsubr (&Sdefine_charset_internal);
2342 defsubr (&Sdefine_charset_alias);
2343 defsubr (&Scharset_plist);
2344 defsubr (&Sset_charset_plist);
2345 defsubr (&Sunify_charset);
2346 defsubr (&Sget_unused_iso_final_char);
2347 defsubr (&Sdeclare_equiv_charset);
2348 defsubr (&Sfind_charset_region);
2349 defsubr (&Sfind_charset_string);
2350 defsubr (&Sdecode_char);
2351 defsubr (&Sencode_char);
2352 defsubr (&Ssplit_char);
2353 defsubr (&Smake_char);
2354 defsubr (&Schar_charset);
2355 defsubr (&Scharset_after);
2356 defsubr (&Siso_charset);
2357 defsubr (&Sclear_charset_maps);
2358 defsubr (&Scharset_priority_list);
2359 defsubr (&Sset_charset_priority);
2360 defsubr (&Scharset_id_internal);
2361
2362 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2363 doc: /* *List of directories to search for charset map files. */);
2364 Vcharset_map_path = Qnil;
2365
2366 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2367 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2368 inhibit_load_charset_map = 0;
2369
2370 DEFVAR_LISP ("charset-list", &Vcharset_list,
2371 doc: /* List of all charsets ever defined. */);
2372 Vcharset_list = Qnil;
2373
2374 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2375 doc: /* ISO639 language mnemonic symbol for the current language environment.
2376 If the current language environment is for multiple languages (e.g. "Latin-1"),
2377 the value may be a list of mnemonics. */);
2378 Vcurrent_iso639_language = Qnil;
2379
2380 charset_ascii
2381 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2382 0, 127, 'B', -1, 0, 1, 0, 0);
2383 charset_iso_8859_1
2384 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2385 0, 255, -1, -1, -1, 1, 0, 0);
2386 charset_unicode
2387 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2388 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2389 charset_emacs
2390 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2391 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2392 charset_eight_bit
2393 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2394 128, 255, -1, 0, -1, 0, 1,
2395 MAX_5_BYTE_CHAR + 1);
2396 }
2397
2398 #endif /* emacs */
2399
2400 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2401 (do not change this comment) */