Revision: emacs@sv.gnu.org/emacs--unicode--0--patch-60
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
7 Copyright (C) 2003, 2004
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
10
11 This file is part of GNU Emacs.
12
13 GNU Emacs is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 GNU Emacs is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with GNU Emacs; see the file COPYING. If not, write to
25 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 Boston, MA 02110-1301, USA. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77
78 /* The corresponding charsets. */
79 int charset_ascii;
80 int charset_eight_bit;
81 int charset_iso_8859_1;
82 int charset_unicode;
83
84 /* The other special charsets. */
85 int charset_jisx0201_roman;
86 int charset_jisx0208_1978;
87 int charset_jisx0208;
88
89 /* Value of charset attribute `charset-iso-plane'. */
90 Lisp_Object Qgl, Qgr;
91
92 /* Charset of unibyte characters. */
93 int charset_unibyte;
94
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list;
97
98 /* Incremented everytime we change Vcharset_ordered_list. This is
99 unsigned short so that it fits in Lisp_Int and never matches
100 -1. */
101 unsigned short charset_ordered_list_tick;
102
103 /* List of iso-2022 charsets. */
104 Lisp_Object Viso_2022_charset_list;
105
106 /* List of emacs-mule charsets. */
107 Lisp_Object Vemacs_mule_charset_list;
108
109 struct charset *emacs_mule_charset[256];
110
111 /* Mapping table from ISO2022's charset (specified by DIMENSION,
112 CHARS, and FINAL-CHAR) to Emacs' charset. */
113 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
114
115 Lisp_Object Vcharset_map_path;
116
117 Lisp_Object Vchar_unified_charset_table;
118
119 /* Defined in chartab.c */
120 extern void
121 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
122 Lisp_Object function, Lisp_Object table,
123 Lisp_Object arg, struct charset *charset,
124 unsigned from, unsigned to));
125
126 #define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
141 : -1)
142
143
144 /* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
146
147 #define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
154 << 8) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
157 << 16) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
159 << 24))))
160
161
162 \f
163
164 /* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
166 int charset_map_loaded;
167
168 struct charset_map_entries
169 {
170 struct {
171 unsigned from, to;
172 int c;
173 } entry[0x10000];
174 struct charset_map_entries *next;
175 };
176
177 /* Load the mapping information for CHARSET from ENTRIES.
178
179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
180
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
183
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
186 setup it too. */
187
188 static void
189 load_charset_map (charset, entries, n_entries, control_flag)
190 struct charset *charset;
191 struct charset_map_entries *entries;
192 int n_entries;
193 int control_flag;
194 {
195 Lisp_Object vec, table;
196 unsigned max_code = CHARSET_MAX_CODE (charset);
197 int ascii_compatible_p = charset->ascii_compatible_p;
198 int min_char, max_char, nonascii_min_char;
199 int i;
200 unsigned char *fast_map = charset->fast_map;
201
202 if (n_entries <= 0)
203 return;
204
205 if (control_flag > 0)
206 {
207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
208
209 table = Fmake_char_table (Qnil, Qnil);
210 if (control_flag == 1)
211 vec = Fmake_vector (make_number (n), make_number (-1));
212 else if (! CHAR_TABLE_P (Vchar_unify_table))
213 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
214
215 charset_map_loaded = 1;
216 }
217
218 min_char = max_char = entries->entry[0].c;
219 nonascii_min_char = MAX_CHAR;
220 for (i = 0; i < n_entries; i++)
221 {
222 unsigned from, to;
223 int from_index, to_index;
224 int from_c, to_c;
225 int idx = i % 0x10000;
226
227 if (i > 0 && idx == 0)
228 entries = entries->next;
229 from = entries->entry[idx].from;
230 to = entries->entry[idx].to;
231 from_c = entries->entry[idx].c;
232 from_index = CODE_POINT_TO_INDEX (charset, from);
233 if (from == to)
234 {
235 to_index = from_index;
236 to_c = from_c;
237 }
238 else
239 {
240 to_index = CODE_POINT_TO_INDEX (charset, to);
241 to_c = from_c + (to_index - from_index);
242 }
243 if (from_index < 0 || to_index < 0)
244 continue;
245
246 if (control_flag < 2)
247 {
248 int c;
249
250 if (to_c > max_char)
251 max_char = to_c;
252 else if (from_c < min_char)
253 min_char = from_c;
254 if (ascii_compatible_p)
255 {
256 if (! ASCII_BYTE_P (from_c))
257 {
258 if (from_c < nonascii_min_char)
259 nonascii_min_char = from_c;
260 }
261 else if (! ASCII_BYTE_P (to_c))
262 {
263 nonascii_min_char = 0x80;
264 }
265 }
266
267 for (c = from_c; c <= to_c; c++)
268 CHARSET_FAST_MAP_SET (c, fast_map);
269
270 if (control_flag == 1)
271 {
272 unsigned code = from;
273
274 if (CHARSET_COMPACT_CODES_P (charset))
275 while (1)
276 {
277 ASET (vec, from_index, make_number (from_c));
278 if (NILP (CHAR_TABLE_REF (table, from_c)))
279 CHAR_TABLE_SET (table, from_c, make_number (code));
280 if (from_index == to_index)
281 break;
282 from_index++, from_c++;
283 code = INDEX_TO_CODE_POINT (charset, from_index);
284 }
285 else
286 for (; from_index <= to_index; from_index++, from_c++)
287 {
288 ASET (vec, from_index, make_number (from_c));
289 if (NILP (CHAR_TABLE_REF (table, from_c)))
290 CHAR_TABLE_SET (table, from_c, make_number (from_index));
291 }
292 }
293 }
294 else
295 {
296 unsigned code = from;
297
298 while (1)
299 {
300 int c1 = DECODE_CHAR (charset, code);
301
302 if (c1 >= 0)
303 {
304 CHAR_TABLE_SET (table, from_c, make_number (c1));
305 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
306 if (CHAR_TABLE_P (Vchar_unified_charset_table))
307 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
308 CHARSET_NAME (charset));
309 }
310 if (from_index == to_index)
311 break;
312 from_index++, from_c++;
313 code = INDEX_TO_CODE_POINT (charset, from_index);
314 }
315 }
316 }
317
318 if (control_flag < 2)
319 {
320 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
321 ? nonascii_min_char : min_char);
322 CHARSET_MAX_CHAR (charset) = max_char;
323 if (control_flag == 1)
324 {
325 CHARSET_DECODER (charset) = vec;
326 CHARSET_ENCODER (charset) = table;
327 }
328 }
329 else
330 CHARSET_DEUNIFIER (charset) = table;
331 }
332
333
334 /* Read a hexadecimal number (preceded by "0x") from the file FP while
335 paying attention to comment charcter '#'. */
336
337 static INLINE unsigned
338 read_hex (fp, eof)
339 FILE *fp;
340 int *eof;
341 {
342 int c;
343 unsigned n;
344
345 while ((c = getc (fp)) != EOF)
346 {
347 if (c == '#')
348 {
349 while ((c = getc (fp)) != EOF && c != '\n');
350 }
351 else if (c == '0')
352 {
353 if ((c = getc (fp)) == EOF || c == 'x')
354 break;
355 }
356 }
357 if (c == EOF)
358 {
359 *eof = 1;
360 return 0;
361 }
362 *eof = 0;
363 n = 0;
364 if (c == 'x')
365 while ((c = getc (fp)) != EOF && isxdigit (c))
366 n = ((n << 4)
367 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
368 else
369 while ((c = getc (fp)) != EOF && isdigit (c))
370 n = (n * 10) + c - '0';
371 if (c != EOF)
372 ungetc (c, fp);
373 return n;
374 }
375
376
377 /* Return a mapping vector for CHARSET loaded from MAPFILE.
378 Each line of MAPFILE has this form
379 0xAAAA 0xCCCC
380 where 0xAAAA is a code-point and 0xCCCC is the corresponding
381 character code, or this form
382 0xAAAA-0xBBBB 0xCCCC
383 where 0xAAAA and 0xBBBB are code-points specifying a range, and
384 0xCCCC is the first character code of the range.
385
386 The returned vector has this form:
387 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
388 where CODE1 is a code-point or a cons of code-points specifying a
389 range. */
390
391 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
392
393 static void
394 load_charset_map_from_file (charset, mapfile, control_flag)
395 struct charset *charset;
396 Lisp_Object mapfile;
397 int control_flag;
398 {
399 unsigned min_code = CHARSET_MIN_CODE (charset);
400 unsigned max_code = CHARSET_MAX_CODE (charset);
401 int fd;
402 FILE *fp;
403 int eof;
404 Lisp_Object suffixes;
405 struct charset_map_entries *head, *entries;
406 int n_entries;
407
408 suffixes = Fcons (build_string (".map"),
409 Fcons (build_string (".TXT"), Qnil));
410
411 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
412 if (fd < 0
413 || ! (fp = fdopen (fd, "r")))
414 {
415 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
416 return;
417 }
418
419 head = entries = ((struct charset_map_entries *)
420 alloca (sizeof (struct charset_map_entries)));
421 n_entries = 0;
422 eof = 0;
423 while (1)
424 {
425 unsigned from, to;
426 int c;
427 int idx;
428
429 from = read_hex (fp, &eof);
430 if (eof)
431 break;
432 if (getc (fp) == '-')
433 to = read_hex (fp, &eof);
434 else
435 to = from;
436 c = (int) read_hex (fp, &eof);
437
438 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
439 continue;
440
441 if (n_entries > 0 && (n_entries % 0x10000) == 0)
442 {
443 entries->next = ((struct charset_map_entries *)
444 alloca (sizeof (struct charset_map_entries)));
445 entries = entries->next;
446 }
447 idx = n_entries % 0x10000;
448 entries->entry[idx].from = from;
449 entries->entry[idx].to = to;
450 entries->entry[idx].c = c;
451 n_entries++;
452 }
453 fclose (fp);
454 close (fd);
455
456 load_charset_map (charset, head, n_entries, control_flag);
457 }
458
459 static void
460 load_charset_map_from_vector (charset, vec, control_flag)
461 struct charset *charset;
462 Lisp_Object vec;
463 int control_flag;
464 {
465 unsigned min_code = CHARSET_MIN_CODE (charset);
466 unsigned max_code = CHARSET_MAX_CODE (charset);
467 struct charset_map_entries *head, *entries;
468 int n_entries;
469 int len = ASIZE (vec);
470 int i;
471
472 if (len % 2 == 1)
473 {
474 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
475 return;
476 }
477
478 head = entries = ((struct charset_map_entries *)
479 alloca (sizeof (struct charset_map_entries)));
480 n_entries = 0;
481 for (i = 0; i < len; i += 2)
482 {
483 Lisp_Object val, val2;
484 unsigned from, to;
485 int c;
486 int idx;
487
488 val = AREF (vec, i);
489 if (CONSP (val))
490 {
491 val2 = XCDR (val);
492 val = XCAR (val);
493 CHECK_NATNUM (val);
494 CHECK_NATNUM (val2);
495 from = XFASTINT (val);
496 to = XFASTINT (val2);
497 }
498 else
499 {
500 CHECK_NATNUM (val);
501 from = to = XFASTINT (val);
502 }
503 val = AREF (vec, i + 1);
504 CHECK_NATNUM (val);
505 c = XFASTINT (val);
506
507 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
508 continue;
509
510 if (n_entries > 0 && (n_entries % 0x10000) == 0)
511 {
512 entries->next = ((struct charset_map_entries *)
513 alloca (sizeof (struct charset_map_entries)));
514 entries = entries->next;
515 }
516 idx = n_entries % 0x10000;
517 entries->entry[idx].from = from;
518 entries->entry[idx].to = to;
519 entries->entry[idx].c = c;
520 n_entries++;
521 }
522
523 load_charset_map (charset, head, n_entries, control_flag);
524 }
525
526 static void
527 load_charset (charset)
528 struct charset *charset;
529 {
530 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
531 {
532 Lisp_Object map;
533
534 map = CHARSET_MAP (charset);
535 if (STRINGP (map))
536 load_charset_map_from_file (charset, map, 1);
537 else
538 load_charset_map_from_vector (charset, map, 1);
539 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
540 }
541 }
542
543
544 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
545 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
546 (object)
547 Lisp_Object object;
548 {
549 return (CHARSETP (object) ? Qt : Qnil);
550 }
551
552
553 void
554 map_charset_chars (c_function, function, arg,
555 charset, from, to)
556 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
557 Lisp_Object function, arg;
558 struct charset *charset;
559 unsigned from, to;
560 {
561 Lisp_Object range;
562 int partial;
563
564 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
565 load_charset (charset);
566
567 partial = (from > CHARSET_MIN_CODE (charset)
568 || to < CHARSET_MAX_CODE (charset));
569
570 if (CHARSET_UNIFIED_P (charset)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
572 {
573 map_char_table_for_charset (c_function, function,
574 CHARSET_DEUNIFIER (charset), arg,
575 partial ? charset : NULL, from, to);
576 }
577
578 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
579 {
580 int from_idx = CODE_POINT_TO_INDEX (charset, from);
581 int to_idx = CODE_POINT_TO_INDEX (charset, to);
582 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
583 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
584
585 range = Fcons (make_number (from_c), make_number (to_c));
586 if (NILP (function))
587 (*c_function) (arg, range);
588 else
589 call2 (function, range, arg);
590 }
591 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
592 {
593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
594 return;
595 map_char_table_for_charset (c_function, function,
596 CHARSET_ENCODER (charset), arg,
597 partial ? charset : NULL, from, to);
598 }
599 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
600 {
601 Lisp_Object subset_info;
602 int offset;
603
604 subset_info = CHARSET_SUBSET (charset);
605 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
606 offset = XINT (AREF (subset_info, 3));
607 from -= offset;
608 if (from < XFASTINT (AREF (subset_info, 1)))
609 from = XFASTINT (AREF (subset_info, 1));
610 to -= offset;
611 if (to > XFASTINT (AREF (subset_info, 2)))
612 to = XFASTINT (AREF (subset_info, 2));
613 map_charset_chars (c_function, function, arg, charset, from, to);
614 }
615 else /* i.e. CHARSET_METHOD_SUPERSET */
616 {
617 Lisp_Object parents;
618
619 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
620 parents = XCDR (parents))
621 {
622 int offset;
623 unsigned this_from, this_to;
624
625 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
626 offset = XINT (XCDR (XCAR (parents)));
627 this_from = from - offset;
628 this_to = to - offset;
629 if (this_from < CHARSET_MIN_CODE (charset))
630 this_from = CHARSET_MIN_CODE (charset);
631 if (this_to > CHARSET_MAX_CODE (charset))
632 this_to = CHARSET_MAX_CODE (charset);
633 map_charset_chars (c_function, function, arg, charset,
634 this_from, this_to);
635 }
636 }
637 }
638
639 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
640 doc: /* Call FUNCTION for all characters in CHARSET.
641 FUNCTION is called with an argument RANGE and the optional 3rd
642 argument ARG.
643
644 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
645 characters contained in CHARSET.
646
647 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
648 range of code points of target characters. */)
649 (function, charset, arg, from_code, to_code)
650 Lisp_Object function, charset, arg, from_code, to_code;
651 {
652 struct charset *cs;
653 unsigned from, to;
654
655 CHECK_CHARSET_GET_CHARSET (charset, cs);
656 if (NILP (from_code))
657 from = CHARSET_MIN_CODE (cs);
658 else
659 {
660 CHECK_NATNUM (from_code);
661 from = XINT (from_code);
662 if (from < CHARSET_MIN_CODE (cs))
663 from = CHARSET_MIN_CODE (cs);
664 }
665 if (NILP (to_code))
666 to = CHARSET_MAX_CODE (cs);
667 else
668 {
669 CHECK_NATNUM (to_code);
670 to = XINT (to_code);
671 if (to > CHARSET_MAX_CODE (cs))
672 to = CHARSET_MAX_CODE (cs);
673 }
674 map_charset_chars (NULL, function, arg, cs, from, to);
675 return Qnil;
676 }
677
678
679 /* Define a charset according to the arguments. The Nth argument is
680 the Nth attribute of the charset (the last attribute `charset-id'
681 is not included). See the docstring of `define-charset' for the
682 detail. */
683
684 DEFUN ("define-charset-internal", Fdefine_charset_internal,
685 Sdefine_charset_internal, charset_arg_max, MANY, 0,
686 doc: /* For internal use only.
687 usage: (define-charset-internal ...) */)
688 (nargs, args)
689 int nargs;
690 Lisp_Object *args;
691 {
692 /* Charset attr vector. */
693 Lisp_Object attrs;
694 Lisp_Object val;
695 unsigned hash_code;
696 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
697 int i, j;
698 struct charset charset;
699 int id;
700 int dimension;
701 int new_definition_p;
702 int nchars;
703
704 if (nargs != charset_arg_max)
705 return Fsignal (Qwrong_number_of_arguments,
706 Fcons (intern ("define-charset-internal"),
707 make_number (nargs)));
708
709 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
710
711 CHECK_SYMBOL (args[charset_arg_name]);
712 ASET (attrs, charset_name, args[charset_arg_name]);
713
714 val = args[charset_arg_code_space];
715 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
716 {
717 int min_byte, max_byte;
718
719 min_byte = XINT (Faref (val, make_number (i * 2)));
720 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
721 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
722 error ("Invalid :code-space value");
723 charset.code_space[i * 4] = min_byte;
724 charset.code_space[i * 4 + 1] = max_byte;
725 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
726 nchars *= charset.code_space[i * 4 + 2];
727 charset.code_space[i * 4 + 3] = nchars;
728 if (max_byte > 0)
729 dimension = i + 1;
730 }
731
732 val = args[charset_arg_dimension];
733 if (NILP (val))
734 charset.dimension = dimension;
735 else
736 {
737 CHECK_NATNUM (val);
738 charset.dimension = XINT (val);
739 if (charset.dimension < 1 || charset.dimension > 4)
740 args_out_of_range_3 (val, make_number (1), make_number (4));
741 }
742
743 charset.code_linear_p
744 = (charset.dimension == 1
745 || (charset.code_space[2] == 256
746 && (charset.dimension == 2
747 || (charset.code_space[6] == 256
748 && (charset.dimension == 3
749 || charset.code_space[10] == 256)))));
750
751 if (! charset.code_linear_p)
752 {
753 charset.code_space_mask = (unsigned char *) xmalloc (256);
754 bzero (charset.code_space_mask, 256);
755 for (i = 0; i < 4; i++)
756 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
757 j++)
758 charset.code_space_mask[j] |= (1 << i);
759 }
760
761 charset.iso_chars_96 = charset.code_space[2] == 96;
762
763 charset.min_code = (charset.code_space[0]
764 | (charset.code_space[4] << 8)
765 | (charset.code_space[8] << 16)
766 | (charset.code_space[12] << 24));
767 charset.max_code = (charset.code_space[1]
768 | (charset.code_space[5] << 8)
769 | (charset.code_space[9] << 16)
770 | (charset.code_space[13] << 24));
771 charset.char_index_offset = 0;
772
773 val = args[charset_arg_min_code];
774 if (! NILP (val))
775 {
776 unsigned code;
777
778 if (INTEGERP (val))
779 code = XINT (val);
780 else
781 {
782 CHECK_CONS (val);
783 CHECK_NUMBER_CAR (val);
784 CHECK_NUMBER_CDR (val);
785 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
786 }
787 if (code < charset.min_code
788 || code > charset.max_code)
789 args_out_of_range_3 (make_number (charset.min_code),
790 make_number (charset.max_code), val);
791 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
792 charset.min_code = code;
793 }
794
795 val = args[charset_arg_max_code];
796 if (! NILP (val))
797 {
798 unsigned code;
799
800 if (INTEGERP (val))
801 code = XINT (val);
802 else
803 {
804 CHECK_CONS (val);
805 CHECK_NUMBER_CAR (val);
806 CHECK_NUMBER_CDR (val);
807 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
808 }
809 if (code < charset.min_code
810 || code > charset.max_code)
811 args_out_of_range_3 (make_number (charset.min_code),
812 make_number (charset.max_code), val);
813 charset.max_code = code;
814 }
815
816 charset.compact_codes_p = charset.max_code < 0x1000000;
817
818 val = args[charset_arg_invalid_code];
819 if (NILP (val))
820 {
821 if (charset.min_code > 0)
822 charset.invalid_code = 0;
823 else
824 {
825 XSETINT (val, charset.max_code + 1);
826 if (XINT (val) == charset.max_code + 1)
827 charset.invalid_code = charset.max_code + 1;
828 else
829 error ("Attribute :invalid-code must be specified");
830 }
831 }
832 else
833 {
834 CHECK_NATNUM (val);
835 charset.invalid_code = XFASTINT (val);
836 }
837
838 val = args[charset_arg_iso_final];
839 if (NILP (val))
840 charset.iso_final = -1;
841 else
842 {
843 CHECK_NUMBER (val);
844 if (XINT (val) < '0' || XINT (val) > 127)
845 error ("Invalid iso-final-char: %d", XINT (val));
846 charset.iso_final = XINT (val);
847 }
848
849 val = args[charset_arg_iso_revision];
850 if (NILP (val))
851 charset.iso_revision = -1;
852 else
853 {
854 CHECK_NUMBER (val);
855 if (XINT (val) > 63)
856 args_out_of_range (make_number (63), val);
857 charset.iso_revision = XINT (val);
858 }
859
860 val = args[charset_arg_emacs_mule_id];
861 if (NILP (val))
862 charset.emacs_mule_id = -1;
863 else
864 {
865 CHECK_NATNUM (val);
866 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
867 error ("Invalid emacs-mule-id: %d", XINT (val));
868 charset.emacs_mule_id = XINT (val);
869 }
870
871 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
872
873 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
874
875 charset.unified_p = 0;
876
877 bzero (charset.fast_map, sizeof (charset.fast_map));
878
879 if (! NILP (args[charset_arg_code_offset]))
880 {
881 val = args[charset_arg_code_offset];
882 CHECK_NUMBER (val);
883
884 charset.method = CHARSET_METHOD_OFFSET;
885 charset.code_offset = XINT (val);
886
887 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
888 charset.min_char = i + charset.code_offset;
889 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
890 charset.max_char = i + charset.code_offset;
891 if (charset.max_char > MAX_CHAR)
892 error ("Unsupported max char: %d", charset.max_char);
893
894 i = (charset.min_char >> 7) << 7;
895 for (; i < 0x10000 && i <= charset.max_char; i += 128)
896 CHARSET_FAST_MAP_SET (i, charset.fast_map);
897 i = (i >> 12) << 12;
898 for (; i <= charset.max_char; i += 0x1000)
899 CHARSET_FAST_MAP_SET (i, charset.fast_map);
900 }
901 else if (! NILP (args[charset_arg_map]))
902 {
903 val = args[charset_arg_map];
904 ASET (attrs, charset_map, val);
905 if (STRINGP (val))
906 load_charset_map_from_file (&charset, val, 0);
907 else
908 load_charset_map_from_vector (&charset, val, 0);
909 charset.method = CHARSET_METHOD_MAP_DEFERRED;
910 }
911 else if (! NILP (args[charset_arg_subset]))
912 {
913 Lisp_Object parent;
914 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
915 struct charset *parent_charset;
916
917 val = args[charset_arg_subset];
918 parent = Fcar (val);
919 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
920 parent_min_code = Fnth (make_number (1), val);
921 CHECK_NATNUM (parent_min_code);
922 parent_max_code = Fnth (make_number (2), val);
923 CHECK_NATNUM (parent_max_code);
924 parent_code_offset = Fnth (make_number (3), val);
925 CHECK_NUMBER (parent_code_offset);
926 val = Fmake_vector (make_number (4), Qnil);
927 ASET (val, 0, make_number (parent_charset->id));
928 ASET (val, 1, parent_min_code);
929 ASET (val, 2, parent_max_code);
930 ASET (val, 3, parent_code_offset);
931 ASET (attrs, charset_subset, val);
932
933 charset.method = CHARSET_METHOD_SUBSET;
934 /* Here, we just copy the parent's fast_map. It's not accurate,
935 but at least it works for quickly detecting which character
936 DOESN'T belong to this charset. */
937 for (i = 0; i < 190; i++)
938 charset.fast_map[i] = parent_charset->fast_map[i];
939
940 /* We also copy these for parents. */
941 charset.min_char = parent_charset->min_char;
942 charset.max_char = parent_charset->max_char;
943 }
944 else if (! NILP (args[charset_arg_superset]))
945 {
946 val = args[charset_arg_superset];
947 charset.method = CHARSET_METHOD_SUPERSET;
948 val = Fcopy_sequence (val);
949 ASET (attrs, charset_superset, val);
950
951 charset.min_char = MAX_CHAR;
952 charset.max_char = 0;
953 for (; ! NILP (val); val = Fcdr (val))
954 {
955 Lisp_Object elt, car_part, cdr_part;
956 int this_id, offset;
957 struct charset *this_charset;
958
959 elt = Fcar (val);
960 if (CONSP (elt))
961 {
962 car_part = XCAR (elt);
963 cdr_part = XCDR (elt);
964 CHECK_CHARSET_GET_ID (car_part, this_id);
965 CHECK_NUMBER (cdr_part);
966 offset = XINT (cdr_part);
967 }
968 else
969 {
970 CHECK_CHARSET_GET_ID (elt, this_id);
971 offset = 0;
972 }
973 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
974
975 this_charset = CHARSET_FROM_ID (this_id);
976 if (charset.min_char > this_charset->min_char)
977 charset.min_char = this_charset->min_char;
978 if (charset.max_char < this_charset->max_char)
979 charset.max_char = this_charset->max_char;
980 for (i = 0; i < 190; i++)
981 charset.fast_map[i] |= this_charset->fast_map[i];
982 }
983 }
984 else
985 error ("None of :code-offset, :map, :parents are specified");
986
987 val = args[charset_arg_unify_map];
988 if (! NILP (val) && !STRINGP (val))
989 CHECK_VECTOR (val);
990 ASET (attrs, charset_unify_map, val);
991
992 CHECK_LIST (args[charset_arg_plist]);
993 ASET (attrs, charset_plist, args[charset_arg_plist]);
994
995 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
996 &hash_code);
997 if (charset.hash_index >= 0)
998 {
999 new_definition_p = 0;
1000 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1001 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1002 }
1003 else
1004 {
1005 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1006 hash_code);
1007 if (charset_table_used == charset_table_size)
1008 {
1009 struct charset *new_table
1010 = (struct charset *) xmalloc (sizeof (struct charset)
1011 * (charset_table_size + 16));
1012 bcopy (charset_table, new_table,
1013 sizeof (struct charset) * charset_table_size);
1014 charset_table_size += 16;
1015 charset_table = new_table;
1016 }
1017 id = charset_table_used++;
1018 new_definition_p = 1;
1019 }
1020
1021 ASET (attrs, charset_id, make_number (id));
1022 charset.id = id;
1023 charset_table[id] = charset;
1024
1025 if (charset.iso_final >= 0)
1026 {
1027 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1028 charset.iso_final) = id;
1029 if (new_definition_p)
1030 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1031 Fcons (make_number (id), Qnil));
1032 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1033 charset_jisx0201_roman = id;
1034 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1035 charset_jisx0208_1978 = id;
1036 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1037 charset_jisx0208 = id;
1038 }
1039
1040 if (charset.emacs_mule_id >= 0)
1041 {
1042 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1043 if (charset.emacs_mule_id < 0xA0)
1044 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1045 else
1046 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1047 if (new_definition_p)
1048 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1049 Fcons (make_number (id), Qnil));
1050 }
1051
1052 if (new_definition_p)
1053 {
1054 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1055 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1056 Fcons (make_number (id), Qnil));
1057 charset_ordered_list_tick++;
1058 }
1059
1060 return Qnil;
1061 }
1062
1063
1064 /* Same as Fdefine_charset_internal but arguments are more convenient
1065 to call from C (typically in syms_of_charset). This can define a
1066 charset of `offset' method only. Return the ID of the new
1067 charset. */
1068
1069 static int
1070 define_charset_internal (name, dimension, code_space, min_code, max_code,
1071 iso_final, iso_revision, emacs_mule_id,
1072 ascii_compatible, supplementary,
1073 code_offset)
1074 Lisp_Object name;
1075 int dimension;
1076 unsigned char *code_space;
1077 unsigned min_code, max_code;
1078 int iso_final, iso_revision, emacs_mule_id;
1079 int ascii_compatible, supplementary;
1080 int code_offset;
1081 {
1082 Lisp_Object args[charset_arg_max];
1083 Lisp_Object plist[14];
1084 Lisp_Object val;
1085 int i;
1086
1087 args[charset_arg_name] = name;
1088 args[charset_arg_dimension] = make_number (dimension);
1089 val = Fmake_vector (make_number (8), make_number (0));
1090 for (i = 0; i < 8; i++)
1091 ASET (val, i, make_number (code_space[i]));
1092 args[charset_arg_code_space] = val;
1093 args[charset_arg_min_code] = make_number (min_code);
1094 args[charset_arg_max_code] = make_number (max_code);
1095 args[charset_arg_iso_final]
1096 = (iso_final < 0 ? Qnil : make_number (iso_final));
1097 args[charset_arg_iso_revision] = make_number (iso_revision);
1098 args[charset_arg_emacs_mule_id]
1099 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1100 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1101 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1102 args[charset_arg_invalid_code] = Qnil;
1103 args[charset_arg_code_offset] = make_number (code_offset);
1104 args[charset_arg_map] = Qnil;
1105 args[charset_arg_subset] = Qnil;
1106 args[charset_arg_superset] = Qnil;
1107 args[charset_arg_unify_map] = Qnil;
1108
1109 plist[0] = intern (":name");
1110 plist[1] = args[charset_arg_name];
1111 plist[2] = intern (":dimension");
1112 plist[3] = args[charset_arg_dimension];
1113 plist[4] = intern (":code-space");
1114 plist[5] = args[charset_arg_code_space];
1115 plist[6] = intern (":iso-final-char");
1116 plist[7] = args[charset_arg_iso_final];
1117 plist[8] = intern (":emacs-mule-id");
1118 plist[9] = args[charset_arg_emacs_mule_id];
1119 plist[10] = intern (":ascii-compatible-p");
1120 plist[11] = args[charset_arg_ascii_compatible_p];
1121 plist[12] = intern (":code-offset");
1122 plist[13] = args[charset_arg_code_offset];
1123
1124 args[charset_arg_plist] = Flist (14, plist);
1125 Fdefine_charset_internal (charset_arg_max, args);
1126
1127 return XINT (CHARSET_SYMBOL_ID (name));
1128 }
1129
1130
1131 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1132 Sdefine_charset_alias, 2, 2, 0,
1133 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1134 (alias, charset)
1135 Lisp_Object alias, charset;
1136 {
1137 Lisp_Object attr;
1138
1139 CHECK_CHARSET_GET_ATTR (charset, attr);
1140 Fputhash (alias, attr, Vcharset_hash_table);
1141 Vcharset_list = Fcons (alias, Vcharset_list);
1142 return Qnil;
1143 }
1144
1145
1146 DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0,
1147 doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
1148 ()
1149 {
1150 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte));
1151 }
1152
1153
1154 DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset,
1155 1, 1, 0,
1156 doc: /* Set the unibyte charset to CHARSET.
1157 This determines how unibyte/multibyte conversion is done. See also
1158 function `unibyte-charset'. */)
1159 (charset)
1160 Lisp_Object charset;
1161 {
1162 struct charset *cs;
1163 int i, c;
1164
1165 CHECK_CHARSET_GET_CHARSET (charset, cs);
1166 if (! cs->ascii_compatible_p
1167 || cs->dimension != 1)
1168 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
1169 charset_unibyte = cs->id;
1170 memset (unibyte_has_multibyte_table, 1, 128);
1171 for (i = 128; i < 256; i++)
1172 {
1173 c = DECODE_CHAR (cs, i);
1174 unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c);
1175 unibyte_has_multibyte_table[i] = c >= 0;
1176 }
1177
1178 return Qnil;
1179 }
1180
1181
1182 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1183 doc: /* Return the property list of CHARSET. */)
1184 (charset)
1185 Lisp_Object charset;
1186 {
1187 Lisp_Object attrs;
1188
1189 CHECK_CHARSET_GET_ATTR (charset, attrs);
1190 return CHARSET_ATTR_PLIST (attrs);
1191 }
1192
1193
1194 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1195 doc: /* Set CHARSET's property list to PLIST. */)
1196 (charset, plist)
1197 Lisp_Object charset, plist;
1198 {
1199 Lisp_Object attrs;
1200
1201 CHECK_CHARSET_GET_ATTR (charset, attrs);
1202 CHARSET_ATTR_PLIST (attrs) = plist;
1203 return plist;
1204 }
1205
1206
1207 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1208 doc: /* Unify characters of CHARSET with Unicode.
1209 This means reading the relevant file and installing the table defined
1210 by CHARSET's `:unify-map' property.
1211
1212 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1213 the same meaning as the `:unify-map' attribute in the function
1214 `define-charset' (which see).
1215
1216 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1217 (charset, unify_map, deunify)
1218 Lisp_Object charset, unify_map, deunify;
1219 {
1220 int id;
1221 struct charset *cs;
1222
1223 CHECK_CHARSET_GET_ID (charset, id);
1224 cs = CHARSET_FROM_ID (id);
1225 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1226 load_charset (cs);
1227 if (NILP (deunify)
1228 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1229 : ! CHARSET_UNIFIED_P (cs))
1230 return Qnil;
1231
1232 CHARSET_UNIFIED_P (cs) = 0;
1233 if (NILP (deunify))
1234 {
1235 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1236 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1237 if (NILP (unify_map))
1238 unify_map = CHARSET_UNIFY_MAP (cs);
1239 if (STRINGP (unify_map))
1240 load_charset_map_from_file (cs, unify_map, 2);
1241 else if (VECTORP (unify_map))
1242 load_charset_map_from_vector (cs, unify_map, 2);
1243 else if (NILP (unify_map))
1244 error ("No unify-map for charset");
1245 else
1246 error ("Bad unify-map arg");
1247 CHARSET_UNIFIED_P (cs) = 1;
1248 }
1249 else if (CHAR_TABLE_P (Vchar_unify_table))
1250 {
1251 int min_code = CHARSET_MIN_CODE (cs);
1252 int max_code = CHARSET_MAX_CODE (cs);
1253 int min_char = DECODE_CHAR (cs, min_code);
1254 int max_char = DECODE_CHAR (cs, max_code);
1255
1256 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1257 }
1258
1259 return Qnil;
1260 }
1261
1262 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1263 Sget_unused_iso_final_char, 2, 2, 0,
1264 doc: /*
1265 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1266 DIMENSION is the number of bytes to represent a character: 1 or 2.
1267 CHARS is the number of characters in a dimension: 94 or 96.
1268
1269 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1270 If there's no unused final char for the specified kind of charset,
1271 return nil. */)
1272 (dimension, chars)
1273 Lisp_Object dimension, chars;
1274 {
1275 int final_char;
1276
1277 CHECK_NUMBER (dimension);
1278 CHECK_NUMBER (chars);
1279 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1280 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1281 if (XINT (chars) != 94 && XINT (chars) != 96)
1282 args_out_of_range_3 (chars, make_number (94), make_number (96));
1283 for (final_char = '0'; final_char <= '?'; final_char++)
1284 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1285 break;
1286 return (final_char <= '?' ? make_number (final_char) : Qnil);
1287 }
1288
1289 static void
1290 check_iso_charset_parameter (dimension, chars, final_char)
1291 Lisp_Object dimension, chars, final_char;
1292 {
1293 CHECK_NATNUM (dimension);
1294 CHECK_NATNUM (chars);
1295 CHECK_NATNUM (final_char);
1296
1297 if (XINT (dimension) > 3)
1298 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1299 if (XINT (chars) != 94 && XINT (chars) != 96)
1300 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1301 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1302 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1303 }
1304
1305
1306 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1307 4, 4, 0,
1308 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1309
1310 On decoding by an ISO-2022 base coding system, when a charset
1311 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1312 if CHARSET is designated instead. */)
1313 (dimension, chars, final_char, charset)
1314 Lisp_Object dimension, chars, final_char, charset;
1315 {
1316 int id;
1317 int chars_flag;
1318
1319 CHECK_CHARSET_GET_ID (charset, id);
1320 check_iso_charset_parameter (dimension, chars, final_char);
1321 chars_flag = XINT (chars) == 96;
1322 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1323 return Qnil;
1324 }
1325
1326
1327 /* Return information about charsets in the text at PTR of NBYTES
1328 bytes, which are NCHARS characters. The value is:
1329
1330 0: Each character is represented by one byte. This is always
1331 true for a unibyte string. For a multibyte string, true if
1332 it contains only ASCII characters.
1333
1334 1: No charsets other than ascii, control-1, and latin-1 are
1335 found.
1336
1337 2: Otherwise.
1338 */
1339
1340 int
1341 string_xstring_p (string)
1342 Lisp_Object string;
1343 {
1344 const unsigned char *p = SDATA (string);
1345 const unsigned char *endp = p + SBYTES (string);
1346
1347 if (SCHARS (string) == SBYTES (string))
1348 return 0;
1349
1350 while (p < endp)
1351 {
1352 int c = STRING_CHAR_ADVANCE (p);
1353
1354 if (c >= 0x100)
1355 return 2;
1356 }
1357 return 1;
1358 }
1359
1360
1361 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1362
1363 CHARSETS is a vector. If Nth element is non-nil, it means the
1364 charset whose id is N is already found.
1365
1366 It may lookup a translation table TABLE if supplied. */
1367
1368 static void
1369 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1370 const unsigned char *ptr;
1371 EMACS_INT nchars, nbytes;
1372 Lisp_Object charsets, table;
1373 int multibyte;
1374 {
1375 const unsigned char *pend = ptr + nbytes;
1376
1377 if (nchars == nbytes)
1378 {
1379 if (multibyte)
1380 ASET (charsets, charset_ascii, Qt);
1381 else
1382 while (ptr < pend)
1383 {
1384 int c = *ptr++;
1385
1386 if (!NILP (table))
1387 c = translate_char (table, c);
1388 if (ASCII_BYTE_P (c))
1389 ASET (charsets, charset_ascii, Qt);
1390 else
1391 ASET (charsets, charset_eight_bit, Qt);
1392 }
1393 }
1394 else
1395 {
1396 while (ptr < pend)
1397 {
1398 int c = STRING_CHAR_ADVANCE (ptr);
1399 struct charset *charset;
1400
1401 if (!NILP (table))
1402 c = translate_char (table, c);
1403 charset = CHAR_CHARSET (c);
1404 ASET (charsets, CHARSET_ID (charset), Qt);
1405 }
1406 }
1407 }
1408
1409 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1410 2, 3, 0,
1411 doc: /* Return a list of charsets in the region between BEG and END.
1412 BEG and END are buffer positions.
1413 Optional arg TABLE if non-nil is a translation table to look up.
1414
1415 If the current buffer is unibyte, the returned list may contain
1416 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1417 (beg, end, table)
1418 Lisp_Object beg, end, table;
1419 {
1420 Lisp_Object charsets;
1421 EMACS_INT from, from_byte, to, stop, stop_byte;
1422 int i;
1423 Lisp_Object val;
1424 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1425
1426 validate_region (&beg, &end);
1427 from = XFASTINT (beg);
1428 stop = to = XFASTINT (end);
1429
1430 if (from < GPT && GPT < to)
1431 {
1432 stop = GPT;
1433 stop_byte = GPT_BYTE;
1434 }
1435 else
1436 stop_byte = CHAR_TO_BYTE (stop);
1437
1438 from_byte = CHAR_TO_BYTE (from);
1439
1440 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1441 while (1)
1442 {
1443 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1444 stop_byte - from_byte, charsets, table,
1445 multibyte);
1446 if (stop < to)
1447 {
1448 from = stop, from_byte = stop_byte;
1449 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1450 }
1451 else
1452 break;
1453 }
1454
1455 val = Qnil;
1456 for (i = charset_table_used - 1; i >= 0; i--)
1457 if (!NILP (AREF (charsets, i)))
1458 val = Fcons (CHARSET_NAME (charset_table + i), val);
1459 return val;
1460 }
1461
1462 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1463 1, 2, 0,
1464 doc: /* Return a list of charsets in STR.
1465 Optional arg TABLE if non-nil is a translation table to look up.
1466
1467 If STR is unibyte, the returned list may contain
1468 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1469 (str, table)
1470 Lisp_Object str, table;
1471 {
1472 Lisp_Object charsets;
1473 int i;
1474 Lisp_Object val;
1475
1476 CHECK_STRING (str);
1477
1478 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1479 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1480 charsets, table,
1481 STRING_MULTIBYTE (str));
1482 val = Qnil;
1483 for (i = charset_table_used - 1; i >= 0; i--)
1484 if (!NILP (AREF (charsets, i)))
1485 val = Fcons (CHARSET_NAME (charset_table + i), val);
1486 return val;
1487 }
1488
1489 \f
1490
1491 /* Return a character correponding to the code-point CODE of
1492 CHARSET. */
1493
1494 int
1495 decode_char (charset, code)
1496 struct charset *charset;
1497 unsigned code;
1498 {
1499 int c, char_index;
1500 enum charset_method method = CHARSET_METHOD (charset);
1501
1502 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1503 return -1;
1504
1505 if (method == CHARSET_METHOD_MAP_DEFERRED)
1506 {
1507 load_charset (charset);
1508 method = CHARSET_METHOD (charset);
1509 }
1510
1511 if (method == CHARSET_METHOD_SUBSET)
1512 {
1513 Lisp_Object subset_info;
1514
1515 subset_info = CHARSET_SUBSET (charset);
1516 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1517 code -= XINT (AREF (subset_info, 3));
1518 if (code < XFASTINT (AREF (subset_info, 1))
1519 || code > XFASTINT (AREF (subset_info, 2)))
1520 c = -1;
1521 else
1522 c = DECODE_CHAR (charset, code);
1523 }
1524 else if (method == CHARSET_METHOD_SUPERSET)
1525 {
1526 Lisp_Object parents;
1527
1528 parents = CHARSET_SUPERSET (charset);
1529 c = -1;
1530 for (; CONSP (parents); parents = XCDR (parents))
1531 {
1532 int id = XINT (XCAR (XCAR (parents)));
1533 int code_offset = XINT (XCDR (XCAR (parents)));
1534 unsigned this_code = code - code_offset;
1535
1536 charset = CHARSET_FROM_ID (id);
1537 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1538 break;
1539 }
1540 }
1541 else
1542 {
1543 char_index = CODE_POINT_TO_INDEX (charset, code);
1544 if (char_index < 0)
1545 return -1;
1546
1547 if (method == CHARSET_METHOD_MAP)
1548 {
1549 Lisp_Object decoder;
1550
1551 decoder = CHARSET_DECODER (charset);
1552 if (! VECTORP (decoder))
1553 return -1;
1554 c = XINT (AREF (decoder, char_index));
1555 }
1556 else
1557 {
1558 c = char_index + CHARSET_CODE_OFFSET (charset);
1559 }
1560 }
1561
1562 if (CHARSET_UNIFIED_P (charset)
1563 && c >= 0)
1564 {
1565 MAYBE_UNIFY_CHAR (c);
1566 }
1567
1568 return c;
1569 }
1570
1571 /* Variable used temporarily by the macro ENCODE_CHAR. */
1572 Lisp_Object charset_work;
1573
1574 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1575 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1576 use CHARSET's strict_max_char instead of max_char. */
1577
1578 unsigned
1579 encode_char (charset, c)
1580 struct charset *charset;
1581 int c;
1582 {
1583 unsigned code;
1584 enum charset_method method = CHARSET_METHOD (charset);
1585
1586 if (CHARSET_UNIFIED_P (charset))
1587 {
1588 Lisp_Object deunifier, deunified;
1589
1590 deunifier = CHARSET_DEUNIFIER (charset);
1591 if (! CHAR_TABLE_P (deunifier))
1592 {
1593 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
1594 deunifier = CHARSET_DEUNIFIER (charset);
1595 }
1596 deunified = CHAR_TABLE_REF (deunifier, c);
1597 if (! NILP (deunified))
1598 c = XINT (deunified);
1599 }
1600
1601 if (method == CHARSET_METHOD_SUBSET)
1602 {
1603 Lisp_Object subset_info;
1604 struct charset *this_charset;
1605
1606 subset_info = CHARSET_SUBSET (charset);
1607 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1608 code = ENCODE_CHAR (this_charset, c);
1609 if (code == CHARSET_INVALID_CODE (this_charset)
1610 || code < XFASTINT (AREF (subset_info, 1))
1611 || code > XFASTINT (AREF (subset_info, 2)))
1612 return CHARSET_INVALID_CODE (charset);
1613 code += XINT (AREF (subset_info, 3));
1614 return code;
1615 }
1616
1617 if (method == CHARSET_METHOD_SUPERSET)
1618 {
1619 Lisp_Object parents;
1620
1621 parents = CHARSET_SUPERSET (charset);
1622 for (; CONSP (parents); parents = XCDR (parents))
1623 {
1624 int id = XINT (XCAR (XCAR (parents)));
1625 int code_offset = XINT (XCDR (XCAR (parents)));
1626 struct charset *this_charset = CHARSET_FROM_ID (id);
1627
1628 code = ENCODE_CHAR (this_charset, c);
1629 if (code != CHARSET_INVALID_CODE (this_charset))
1630 return code + code_offset;
1631 }
1632 return CHARSET_INVALID_CODE (charset);
1633 }
1634
1635 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1636 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1637 return CHARSET_INVALID_CODE (charset);
1638
1639 if (method == CHARSET_METHOD_MAP_DEFERRED)
1640 {
1641 load_charset (charset);
1642 method = CHARSET_METHOD (charset);
1643 }
1644
1645 if (method == CHARSET_METHOD_MAP)
1646 {
1647 Lisp_Object encoder;
1648 Lisp_Object val;
1649
1650 encoder = CHARSET_ENCODER (charset);
1651 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1652 return CHARSET_INVALID_CODE (charset);
1653 val = CHAR_TABLE_REF (encoder, c);
1654 if (NILP (val))
1655 return CHARSET_INVALID_CODE (charset);
1656 code = XINT (val);
1657 if (! CHARSET_COMPACT_CODES_P (charset))
1658 code = INDEX_TO_CODE_POINT (charset, code);
1659 }
1660 else /* method == CHARSET_METHOD_OFFSET */
1661 {
1662 code = c - CHARSET_CODE_OFFSET (charset);
1663 code = INDEX_TO_CODE_POINT (charset, code);
1664 }
1665
1666 return code;
1667 }
1668
1669
1670 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1671 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1672 Return nil if CODE-POINT is not valid in CHARSET.
1673
1674 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1675
1676 Optional argument RESTRICTION specifies a way to map the pair of CCS
1677 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1678 (charset, code_point, restriction)
1679 Lisp_Object charset, code_point, restriction;
1680 {
1681 int c, id;
1682 unsigned code;
1683 struct charset *charsetp;
1684
1685 CHECK_CHARSET_GET_ID (charset, id);
1686 if (CONSP (code_point))
1687 {
1688 CHECK_NATNUM_CAR (code_point);
1689 CHECK_NATNUM_CDR (code_point);
1690 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1691 }
1692 else
1693 {
1694 CHECK_NATNUM (code_point);
1695 code = XINT (code_point);
1696 }
1697 charsetp = CHARSET_FROM_ID (id);
1698 c = DECODE_CHAR (charsetp, code);
1699 return (c >= 0 ? make_number (c) : Qnil);
1700 }
1701
1702
1703 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1704 doc: /* Encode the character CH into a code-point of CHARSET.
1705 Return nil if CHARSET doesn't include CH.
1706
1707 Optional argument RESTRICTION specifies a way to map CHAR to a
1708 code-point in CCS. Currently not supported and just ignored. */)
1709 (ch, charset, restriction)
1710 Lisp_Object ch, charset, restriction;
1711 {
1712 int id;
1713 unsigned code;
1714 struct charset *charsetp;
1715
1716 CHECK_CHARSET_GET_ID (charset, id);
1717 CHECK_NATNUM (ch);
1718 charsetp = CHARSET_FROM_ID (id);
1719 code = ENCODE_CHAR (charsetp, XINT (ch));
1720 if (code == CHARSET_INVALID_CODE (charsetp))
1721 return Qnil;
1722 if (code > 0x7FFFFFF)
1723 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1724 return make_number (code);
1725 }
1726
1727
1728 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1729 doc:
1730 /* Return a character of CHARSET whose position codes are CODEn.
1731
1732 CODE1 through CODE4 are optional, but if you don't supply sufficient
1733 position codes, it is assumed that the minimum code in each dimension
1734 is specified. */)
1735 (charset, code1, code2, code3, code4)
1736 Lisp_Object charset, code1, code2, code3, code4;
1737 {
1738 int id, dimension;
1739 struct charset *charsetp;
1740 unsigned code;
1741 int c;
1742
1743 CHECK_CHARSET_GET_ID (charset, id);
1744 charsetp = CHARSET_FROM_ID (id);
1745
1746 dimension = CHARSET_DIMENSION (charsetp);
1747 if (NILP (code1))
1748 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1749 ? 0 : CHARSET_MIN_CODE (charsetp));
1750 else
1751 {
1752 CHECK_NATNUM (code1);
1753 if (XFASTINT (code1) >= 0x100)
1754 args_out_of_range (make_number (0xFF), code1);
1755 code = XFASTINT (code1);
1756
1757 if (dimension > 1)
1758 {
1759 code <<= 8;
1760 if (NILP (code2))
1761 code |= charsetp->code_space[(dimension - 2) * 4];
1762 else
1763 {
1764 CHECK_NATNUM (code2);
1765 if (XFASTINT (code2) >= 0x100)
1766 args_out_of_range (make_number (0xFF), code2);
1767 code |= XFASTINT (code2);
1768 }
1769
1770 if (dimension > 2)
1771 {
1772 code <<= 8;
1773 if (NILP (code3))
1774 code |= charsetp->code_space[(dimension - 3) * 4];
1775 else
1776 {
1777 CHECK_NATNUM (code3);
1778 if (XFASTINT (code3) >= 0x100)
1779 args_out_of_range (make_number (0xFF), code3);
1780 code |= XFASTINT (code3);
1781 }
1782
1783 if (dimension > 3)
1784 {
1785 code <<= 8;
1786 if (NILP (code4))
1787 code |= charsetp->code_space[0];
1788 else
1789 {
1790 CHECK_NATNUM (code4);
1791 if (XFASTINT (code4) >= 0x100)
1792 args_out_of_range (make_number (0xFF), code4);
1793 code |= XFASTINT (code4);
1794 }
1795 }
1796 }
1797 }
1798 }
1799
1800 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1801 code &= 0x7F7F7F7F;
1802 c = DECODE_CHAR (charsetp, code);
1803 if (c < 0)
1804 error ("Invalid code(s)");
1805 return make_number (c);
1806 }
1807
1808
1809 /* Return the first charset in CHARSET_LIST that contains C.
1810 CHARSET_LIST is a list of charset IDs. If it is nil, use
1811 Vcharset_ordered_list. */
1812
1813 struct charset *
1814 char_charset (c, charset_list, code_return)
1815 int c;
1816 Lisp_Object charset_list;
1817 unsigned *code_return;
1818 {
1819 if (NILP (charset_list))
1820 charset_list = Vcharset_ordered_list;
1821
1822 while (CONSP (charset_list))
1823 {
1824 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1825 unsigned code = ENCODE_CHAR (charset, c);
1826
1827 if (code != CHARSET_INVALID_CODE (charset))
1828 {
1829 if (code_return)
1830 *code_return = code;
1831 return charset;
1832 }
1833 charset_list = XCDR (charset_list);
1834 }
1835 return NULL;
1836 }
1837
1838
1839 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1840 doc:
1841 /*Return list of charset and one to four position-codes of CHAR.
1842 The charset is decided by the current priority order of charsets.
1843 A position-code is a byte value of each dimension of the code-point of
1844 CHAR in the charset. */)
1845 (ch)
1846 Lisp_Object ch;
1847 {
1848 struct charset *charset;
1849 int c, dimension;
1850 unsigned code;
1851 Lisp_Object val;
1852
1853 CHECK_CHARACTER (ch);
1854 c = XFASTINT (ch);
1855 charset = CHAR_CHARSET (c);
1856 if (! charset)
1857 abort ();
1858 code = ENCODE_CHAR (charset, c);
1859 if (code == CHARSET_INVALID_CODE (charset))
1860 abort ();
1861 dimension = CHARSET_DIMENSION (charset);
1862 for (val = Qnil; dimension > 0; dimension--)
1863 {
1864 val = Fcons (make_number (code & 0xFF), val);
1865 code >>= 8;
1866 }
1867 return Fcons (CHARSET_NAME (charset), val);
1868 }
1869
1870
1871 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1872 doc: /* Return the charset of highest priority that contains CH. */)
1873 (ch)
1874 Lisp_Object ch;
1875 {
1876 struct charset *charset;
1877
1878 CHECK_CHARACTER (ch);
1879 charset = CHAR_CHARSET (XINT (ch));
1880 return (CHARSET_NAME (charset));
1881 }
1882
1883
1884 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1885 doc: /*
1886 Return charset of a character in the current buffer at position POS.
1887 If POS is nil, it defauls to the current point.
1888 If POS is out of range, the value is nil. */)
1889 (pos)
1890 Lisp_Object pos;
1891 {
1892 Lisp_Object ch;
1893 struct charset *charset;
1894
1895 ch = Fchar_after (pos);
1896 if (! INTEGERP (ch))
1897 return ch;
1898 charset = CHAR_CHARSET (XINT (ch));
1899 return (CHARSET_NAME (charset));
1900 }
1901
1902
1903 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1904 doc: /*
1905 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1906
1907 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1908 by their DIMENSION, CHARS, and FINAL-CHAR,
1909 where as Emacs distinguishes them by charset symbol.
1910 See the documentation of the function `charset-info' for the meanings of
1911 DIMENSION, CHARS, and FINAL-CHAR. */)
1912 (dimension, chars, final_char)
1913 Lisp_Object dimension, chars, final_char;
1914 {
1915 int id;
1916 int chars_flag;
1917
1918 check_iso_charset_parameter (dimension, chars, final_char);
1919 chars_flag = XFASTINT (chars) == 96;
1920 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
1921 XFASTINT (final_char));
1922 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
1923 }
1924
1925
1926 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1927 0, 0, 0,
1928 doc: /*
1929 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1930 ()
1931 {
1932 int i;
1933 struct charset *charset;
1934 Lisp_Object attrs;
1935
1936 for (i = 0; i < charset_table_used; i++)
1937 {
1938 charset = CHARSET_FROM_ID (i);
1939 attrs = CHARSET_ATTRIBUTES (charset);
1940
1941 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1942 {
1943 CHARSET_ATTR_DECODER (attrs) = Qnil;
1944 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1945 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1946 }
1947
1948 if (CHARSET_UNIFIED_P (charset))
1949 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
1950 }
1951
1952 if (CHAR_TABLE_P (Vchar_unified_charset_table))
1953 {
1954 Foptimize_char_table (Vchar_unified_charset_table);
1955 Vchar_unify_table = Vchar_unified_charset_table;
1956 Vchar_unified_charset_table = Qnil;
1957 }
1958
1959 return Qnil;
1960 }
1961
1962 DEFUN ("charset-priority-list", Fcharset_priority_list,
1963 Scharset_priority_list, 0, 1, 0,
1964 doc: /* Return the list of charsets ordered by priority.
1965 HIGHESTP non-nil means just return the highest priority one. */)
1966 (highestp)
1967 Lisp_Object highestp;
1968 {
1969 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1970
1971 if (!NILP (highestp))
1972 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
1973
1974 while (!NILP (list))
1975 {
1976 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
1977 list = XCDR (list);
1978 }
1979 return Fnreverse (val);
1980 }
1981
1982 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1983 1, MANY, 0,
1984 doc: /* Assign higher priority to the charsets given as arguments.
1985 usage: (set-charset-priority &rest charsets) */)
1986 (nargs, args)
1987 int nargs;
1988 Lisp_Object *args;
1989 {
1990 Lisp_Object new_head, old_list, arglist[2];
1991 Lisp_Object list_2022, list_emacs_mule;
1992 int i, id;
1993
1994 old_list = Fcopy_sequence (Vcharset_ordered_list);
1995 new_head = Qnil;
1996 for (i = 0; i < nargs; i++)
1997 {
1998 CHECK_CHARSET_GET_ID (args[i], id);
1999 if (! NILP (Fmemq (make_number (id), old_list)))
2000 {
2001 old_list = Fdelq (make_number (id), old_list);
2002 new_head = Fcons (make_number (id), new_head);
2003 }
2004 }
2005 arglist[0] = Fnreverse (new_head);
2006 arglist[1] = old_list;
2007 Vcharset_ordered_list = Fnconc (2, arglist);
2008 charset_ordered_list_tick++;
2009
2010 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2011 CONSP (old_list); old_list = XCDR (old_list))
2012 {
2013 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2014 list_2022 = Fcons (XCAR (old_list), list_2022);
2015 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2016 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2017 }
2018 Viso_2022_charset_list = Fnreverse (list_2022);
2019 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2020
2021 return Qnil;
2022 }
2023
2024 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2025 0, 1, 0,
2026 doc: /* Internal use only.
2027 Return charset identification number of CHARSET. */)
2028 (charset)
2029 Lisp_Object charset;
2030 {
2031 int id;
2032
2033 CHECK_CHARSET_GET_ID (charset, id);
2034 return make_number (id);
2035 }
2036
2037 \f
2038 void
2039 init_charset ()
2040 {
2041 Vcharset_map_path
2042 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
2043 Qnil);
2044 }
2045
2046
2047 void
2048 init_charset_once ()
2049 {
2050 int i, j, k;
2051
2052 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2053 for (j = 0; j < ISO_MAX_CHARS; j++)
2054 for (k = 0; k < ISO_MAX_FINAL; k++)
2055 iso_charset_table[i][j][k] = -1;
2056
2057 for (i = 0; i < 256; i++)
2058 emacs_mule_charset[i] = NULL;
2059
2060 charset_jisx0201_roman = -1;
2061 charset_jisx0208_1978 = -1;
2062 charset_jisx0208 = -1;
2063
2064 for (i = 0; i < 128; i++)
2065 unibyte_to_multibyte_table[i] = i;
2066 for (; i < 256; i++)
2067 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2068 }
2069
2070 #ifdef emacs
2071
2072 void
2073 syms_of_charset ()
2074 {
2075 DEFSYM (Qcharsetp, "charsetp");
2076
2077 DEFSYM (Qascii, "ascii");
2078 DEFSYM (Qunicode, "unicode");
2079 DEFSYM (Qeight_bit, "eight-bit");
2080 DEFSYM (Qiso_8859_1, "iso-8859-1");
2081
2082 DEFSYM (Qgl, "gl");
2083 DEFSYM (Qgr, "gr");
2084
2085 staticpro (&Vcharset_ordered_list);
2086 Vcharset_ordered_list = Qnil;
2087
2088 staticpro (&Viso_2022_charset_list);
2089 Viso_2022_charset_list = Qnil;
2090
2091 staticpro (&Vemacs_mule_charset_list);
2092 Vemacs_mule_charset_list = Qnil;
2093
2094 staticpro (&Vcharset_hash_table);
2095 {
2096 Lisp_Object args[2];
2097 args[0] = QCtest;
2098 args[1] = Qeq;
2099 Vcharset_hash_table = Fmake_hash_table (2, args);
2100 }
2101
2102 charset_table_size = 128;
2103 charset_table = ((struct charset *)
2104 xmalloc (sizeof (struct charset) * charset_table_size));
2105 charset_table_used = 0;
2106
2107 staticpro (&Vchar_unified_charset_table);
2108 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2109
2110 defsubr (&Scharsetp);
2111 defsubr (&Smap_charset_chars);
2112 defsubr (&Sdefine_charset_internal);
2113 defsubr (&Sdefine_charset_alias);
2114 defsubr (&Sunibyte_charset);
2115 defsubr (&Sset_unibyte_charset);
2116 defsubr (&Scharset_plist);
2117 defsubr (&Sset_charset_plist);
2118 defsubr (&Sunify_charset);
2119 defsubr (&Sget_unused_iso_final_char);
2120 defsubr (&Sdeclare_equiv_charset);
2121 defsubr (&Sfind_charset_region);
2122 defsubr (&Sfind_charset_string);
2123 defsubr (&Sdecode_char);
2124 defsubr (&Sencode_char);
2125 defsubr (&Ssplit_char);
2126 defsubr (&Smake_char);
2127 defsubr (&Schar_charset);
2128 defsubr (&Scharset_after);
2129 defsubr (&Siso_charset);
2130 defsubr (&Sclear_charset_maps);
2131 defsubr (&Scharset_priority_list);
2132 defsubr (&Sset_charset_priority);
2133 defsubr (&Scharset_id_internal);
2134
2135 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2136 doc: /* *Lisp of directories to search for charset map files. */);
2137 Vcharset_map_path = Qnil;
2138
2139 DEFVAR_LISP ("charset-list", &Vcharset_list,
2140 doc: /* List of all charsets ever defined. */);
2141 Vcharset_list = Qnil;
2142
2143 charset_ascii
2144 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2145 0, 127, 'B', -1, 0, 1, 0, 0);
2146 charset_iso_8859_1
2147 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2148 0, 255, -1, -1, -1, 1, 0, 0);
2149 charset_unicode
2150 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2151 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2152 charset_eight_bit
2153 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2154 128, 255, -1, 0, -1, 0, 0,
2155 MAX_5_BYTE_CHAR + 1);
2156 }
2157
2158 #endif /* emacs */
2159
2160 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2161 (do not change this comment) */