* charset.c (load_charset_map_from_file)
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
41
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
49
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
54
55 */
56
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
60
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
64
65 /* Table of struct charset. */
66 struct charset *charset_table;
67
68 static int charset_table_size;
69 static int charset_table_used;
70
71 Lisp_Object Qcharsetp;
72
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
79
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
86
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
92
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
95
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
98
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
101
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
105
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
110
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
113
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
116
117 struct charset *emacs_mule_charset[256];
118
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
123 Lisp_Object Vcharset_map_path;
124
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
127
128 Lisp_Object Vcurrent_iso639_language;
129
130 /* Defined in chartab.c */
131 extern void
132 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to));
136
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
152 : -1)
153
154
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
171
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
174
175 static struct
176 {
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205 } *temp_charset_work;
206
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
229 \f
230
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded;
234
235 struct charset_map_entries
236 {
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242 };
243
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
253
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
259
260 If CONTROL_FLAG is 2, setup the following tables.
261
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 */
268
269 static void
270 load_charset_map (charset, entries, n_entries, control_flag)
271 struct charset *charset;
272 struct charset_map_entries *entries;
273 int n_entries;
274 int control_flag;
275 {
276 Lisp_Object vec, table;
277 unsigned max_code = CHARSET_MAX_CODE (charset);
278 int ascii_compatible_p = charset->ascii_compatible_p;
279 int min_char, max_char, nonascii_min_char;
280 int i;
281 unsigned char *fast_map = charset->fast_map;
282
283 if (n_entries <= 0)
284 return;
285
286 if (control_flag)
287 {
288 if (! inhibit_load_charset_map)
289 {
290 if (control_flag == 1)
291 {
292 if (charset->method == CHARSET_METHOD_MAP)
293 {
294 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
295
296 vec = CHARSET_DECODER (charset)
297 = Fmake_vector (make_number (n), make_number (-1));
298 }
299 else
300 {
301 char_table_set_range (Vchar_unify_table,
302 charset->min_char, charset->max_char,
303 Qnil);
304 }
305 }
306 else
307 {
308 table = Fmake_char_table (Qnil, Qnil);
309 if (charset->method == CHARSET_METHOD_MAP)
310 CHARSET_ENCODER (charset) = table;
311 else
312 CHARSET_DEUNIFIER (charset) = table;
313 }
314 }
315 else
316 {
317 if (! temp_charset_work)
318 temp_charset_work = malloc (sizeof (*temp_charset_work));
319 if (control_flag == 1)
320 {
321 memset (temp_charset_work->table.decoder, -1,
322 sizeof (int) * 0x10000);
323 }
324 else
325 {
326 memset (temp_charset_work->table.encoder, 0,
327 sizeof (unsigned short) * 0x20000);
328 temp_charset_work->zero_index_char = -1;
329 }
330 temp_charset_work->current = charset;
331 temp_charset_work->for_encoder = (control_flag == 2);
332 control_flag += 2;
333 }
334 charset_map_loaded = 1;
335 }
336
337 min_char = max_char = entries->entry[0].c;
338 nonascii_min_char = MAX_CHAR;
339 for (i = 0; i < n_entries; i++)
340 {
341 unsigned from, to;
342 int from_index, to_index;
343 int from_c, to_c;
344 int idx = i % 0x10000;
345
346 if (i > 0 && idx == 0)
347 entries = entries->next;
348 from = entries->entry[idx].from;
349 to = entries->entry[idx].to;
350 from_c = entries->entry[idx].c;
351 from_index = CODE_POINT_TO_INDEX (charset, from);
352 if (from == to)
353 {
354 to_index = from_index;
355 to_c = from_c;
356 }
357 else
358 {
359 to_index = CODE_POINT_TO_INDEX (charset, to);
360 to_c = from_c + (to_index - from_index);
361 }
362 if (from_index < 0 || to_index < 0)
363 continue;
364
365 if (to_c > max_char)
366 max_char = to_c;
367 else if (from_c < min_char)
368 min_char = from_c;
369
370 if (control_flag == 1)
371 {
372 if (charset->method == CHARSET_METHOD_MAP)
373 for (; from_index <= to_index; from_index++, from_c++)
374 ASET (vec, from_index, make_number (from_c));
375 else
376 for (; from_index <= to_index; from_index++, from_c++)
377 CHAR_TABLE_SET (Vchar_unify_table,
378 CHARSET_CODE_OFFSET (charset) + from_index,
379 make_number (from_c));
380 }
381 else if (control_flag == 2)
382 {
383 if (charset->method == CHARSET_METHOD_MAP
384 && CHARSET_COMPACT_CODES_P (charset))
385 for (; from_index <= to_index; from_index++, from_c++)
386 {
387 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
388
389 if (NILP (CHAR_TABLE_REF (table, from_c)))
390 CHAR_TABLE_SET (table, from_c, make_number (code));
391 }
392 else
393 for (; from_index <= to_index; from_index++, from_c++)
394 {
395 if (NILP (CHAR_TABLE_REF (table, from_c)))
396 CHAR_TABLE_SET (table, from_c, make_number (from_index));
397 }
398 }
399 else if (control_flag == 3)
400 for (; from_index <= to_index; from_index++, from_c++)
401 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
402 else if (control_flag == 4)
403 for (; from_index <= to_index; from_index++, from_c++)
404 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
405 else /* control_flag == 0 */
406 {
407 if (ascii_compatible_p)
408 {
409 if (! ASCII_BYTE_P (from_c))
410 {
411 if (from_c < nonascii_min_char)
412 nonascii_min_char = from_c;
413 }
414 else if (! ASCII_BYTE_P (to_c))
415 {
416 nonascii_min_char = 0x80;
417 }
418 }
419
420 for (; from_c <= to_c; from_c++)
421 CHARSET_FAST_MAP_SET (from_c, fast_map);
422 }
423 }
424
425 if (control_flag == 0)
426 {
427 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
428 ? nonascii_min_char : min_char);
429 CHARSET_MAX_CHAR (charset) = max_char;
430 }
431 else if (control_flag == 4)
432 {
433 temp_charset_work->min_char = min_char;
434 temp_charset_work->max_char = max_char;
435 }
436 }
437
438
439 /* Read a hexadecimal number (preceded by "0x") from the file FP while
440 paying attention to comment charcter '#'. */
441
442 static INLINE unsigned
443 read_hex (fp, eof)
444 FILE *fp;
445 int *eof;
446 {
447 int c;
448 unsigned n;
449
450 while ((c = getc (fp)) != EOF)
451 {
452 if (c == '#')
453 {
454 while ((c = getc (fp)) != EOF && c != '\n');
455 }
456 else if (c == '0')
457 {
458 if ((c = getc (fp)) == EOF || c == 'x')
459 break;
460 }
461 }
462 if (c == EOF)
463 {
464 *eof = 1;
465 return 0;
466 }
467 *eof = 0;
468 n = 0;
469 if (c == 'x')
470 while ((c = getc (fp)) != EOF && isxdigit (c))
471 n = ((n << 4)
472 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
473 else
474 while ((c = getc (fp)) != EOF && isdigit (c))
475 n = (n * 10) + c - '0';
476 if (c != EOF)
477 ungetc (c, fp);
478 return n;
479 }
480
481 extern Lisp_Object Qfile_name_handler_alist;
482
483 /* Return a mapping vector for CHARSET loaded from MAPFILE.
484 Each line of MAPFILE has this form
485 0xAAAA 0xCCCC
486 where 0xAAAA is a code-point and 0xCCCC is the corresponding
487 character code, or this form
488 0xAAAA-0xBBBB 0xCCCC
489 where 0xAAAA and 0xBBBB are code-points specifying a range, and
490 0xCCCC is the first character code of the range.
491
492 The returned vector has this form:
493 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
494 where CODE1 is a code-point or a cons of code-points specifying a
495 range.
496
497 Note that this function uses `openp' to open MAPFILE but ignores
498 `file-name-handler-alist' to avoid running any Lisp code. */
499
500 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
501
502 static void
503 load_charset_map_from_file (charset, mapfile, control_flag)
504 struct charset *charset;
505 Lisp_Object mapfile;
506 int control_flag;
507 {
508 unsigned min_code = CHARSET_MIN_CODE (charset);
509 unsigned max_code = CHARSET_MAX_CODE (charset);
510 int fd;
511 FILE *fp;
512 int eof;
513 Lisp_Object suffixes;
514 struct charset_map_entries *head, *entries;
515 int n_entries, count;
516 USE_SAFE_ALLOCA;
517
518 suffixes = Fcons (build_string (".map"),
519 Fcons (build_string (".TXT"), Qnil));
520
521 count = SPECPDL_INDEX ();
522 specbind (Qfile_name_handler_alist, Qnil);
523 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
524 unbind_to (count, Qnil);
525 if (fd < 0
526 || ! (fp = fdopen (fd, "r")))
527 error ("Failure in loading charset map: %S", SDATA (mapfile));
528
529 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
530 large (larger than MAX_ALLOCA). */
531 SAFE_ALLOCA (head, struct charset_map_entries *,
532 sizeof (struct charset_map_entries));
533 entries = head;
534
535 n_entries = 0;
536 eof = 0;
537 while (1)
538 {
539 unsigned from, to;
540 int c;
541 int idx;
542
543 from = read_hex (fp, &eof);
544 if (eof)
545 break;
546 if (getc (fp) == '-')
547 to = read_hex (fp, &eof);
548 else
549 to = from;
550 c = (int) read_hex (fp, &eof);
551
552 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
553 continue;
554
555 if (n_entries > 0 && (n_entries % 0x10000) == 0)
556 {
557 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
558 sizeof (struct charset_map_entries));
559 entries = entries->next;
560 }
561 idx = n_entries % 0x10000;
562 entries->entry[idx].from = from;
563 entries->entry[idx].to = to;
564 entries->entry[idx].c = c;
565 n_entries++;
566 }
567 fclose (fp);
568 close (fd);
569
570 load_charset_map (charset, head, n_entries, control_flag);
571 SAFE_FREE ();
572 }
573
574 static void
575 load_charset_map_from_vector (charset, vec, control_flag)
576 struct charset *charset;
577 Lisp_Object vec;
578 int control_flag;
579 {
580 unsigned min_code = CHARSET_MIN_CODE (charset);
581 unsigned max_code = CHARSET_MAX_CODE (charset);
582 struct charset_map_entries *head, *entries;
583 int n_entries;
584 int len = ASIZE (vec);
585 int i;
586 USE_SAFE_ALLOCA;
587
588 if (len % 2 == 1)
589 {
590 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
591 return;
592 }
593
594 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
595 large (larger than MAX_ALLOCA). */
596 SAFE_ALLOCA (head, struct charset_map_entries *,
597 sizeof (struct charset_map_entries));
598 entries = head;
599
600 n_entries = 0;
601 for (i = 0; i < len; i += 2)
602 {
603 Lisp_Object val, val2;
604 unsigned from, to;
605 int c;
606 int idx;
607
608 val = AREF (vec, i);
609 if (CONSP (val))
610 {
611 val2 = XCDR (val);
612 val = XCAR (val);
613 CHECK_NATNUM (val);
614 CHECK_NATNUM (val2);
615 from = XFASTINT (val);
616 to = XFASTINT (val2);
617 }
618 else
619 {
620 CHECK_NATNUM (val);
621 from = to = XFASTINT (val);
622 }
623 val = AREF (vec, i + 1);
624 CHECK_NATNUM (val);
625 c = XFASTINT (val);
626
627 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
628 continue;
629
630 if (n_entries > 0 && (n_entries % 0x10000) == 0)
631 {
632 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
633 sizeof (struct charset_map_entries));
634 entries = entries->next;
635 }
636 idx = n_entries % 0x10000;
637 entries->entry[idx].from = from;
638 entries->entry[idx].to = to;
639 entries->entry[idx].c = c;
640 n_entries++;
641 }
642
643 load_charset_map (charset, head, n_entries, control_flag);
644 SAFE_FREE ();
645 }
646
647
648 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
649 map it is (see the comment of load_charset_map for the detail). */
650
651 static void
652 load_charset (charset, control_flag)
653 struct charset *charset;
654 int control_flag;
655 {
656 Lisp_Object map;
657
658 if (inhibit_load_charset_map
659 && temp_charset_work
660 && charset == temp_charset_work->current
661 && ((control_flag == 2) == temp_charset_work->for_encoder))
662 return;
663
664 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
665 map = CHARSET_MAP (charset);
666 else if (CHARSET_UNIFIED_P (charset))
667 map = CHARSET_UNIFY_MAP (charset);
668 if (STRINGP (map))
669 load_charset_map_from_file (charset, map, control_flag);
670 else
671 load_charset_map_from_vector (charset, map, control_flag);
672 }
673
674
675 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
676 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
677 (object)
678 Lisp_Object object;
679 {
680 return (CHARSETP (object) ? Qt : Qnil);
681 }
682
683
684 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
685 Lisp_Object function, Lisp_Object arg,
686 unsigned from, unsigned to));
687
688 void
689 map_charset_for_dump (c_function, function, arg, from, to)
690 void (*c_function) (Lisp_Object, Lisp_Object);
691 Lisp_Object function, arg;
692 unsigned from, to;
693 {
694 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
695 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
696 Lisp_Object range;
697 int c, stop;
698 struct gcpro gcpro1;
699
700 range = Fcons (Qnil, Qnil);
701 GCPRO1 (range);
702
703 c = temp_charset_work->min_char;
704 stop = (temp_charset_work->max_char < 0x20000
705 ? temp_charset_work->max_char : 0xFFFF);
706
707 while (1)
708 {
709 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
710
711 if (index >= from_idx && index <= to_idx)
712 {
713 if (NILP (XCAR (range)))
714 XSETCAR (range, make_number (c));
715 }
716 else if (! NILP (XCAR (range)))
717 {
718 XSETCDR (range, make_number (c - 1));
719 if (c_function)
720 (*c_function) (arg, range);
721 else
722 call2 (function, range, arg);
723 XSETCAR (range, Qnil);
724 }
725 if (c == stop)
726 {
727 if (c == temp_charset_work->max_char)
728 {
729 if (! NILP (XCAR (range)))
730 {
731 XSETCDR (range, make_number (c));
732 if (c_function)
733 (*c_function) (arg, range);
734 else
735 call2 (function, range, arg);
736 }
737 break;
738 }
739 c = 0x1FFFF;
740 stop = temp_charset_work->max_char;
741 }
742 c++;
743 }
744 UNGCPRO;
745 }
746
747 void
748 map_charset_chars (c_function, function, arg,
749 charset, from, to)
750 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
751 Lisp_Object function, arg;
752 struct charset *charset;
753 unsigned from, to;
754 {
755 Lisp_Object range;
756 int partial;
757
758 partial = (from > CHARSET_MIN_CODE (charset)
759 || to < CHARSET_MAX_CODE (charset));
760
761 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
762 {
763 int from_idx = CODE_POINT_TO_INDEX (charset, from);
764 int to_idx = CODE_POINT_TO_INDEX (charset, to);
765 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
766 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
767
768 if (CHARSET_UNIFIED_P (charset))
769 {
770 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
771 load_charset (charset, 2);
772 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
773 map_char_table_for_charset (c_function, function,
774 CHARSET_DEUNIFIER (charset), arg,
775 partial ? charset : NULL, from, to);
776 else
777 map_charset_for_dump (c_function, function, arg, from, to);
778 }
779
780 range = Fcons (make_number (from_c), make_number (to_c));
781 if (NILP (function))
782 (*c_function) (arg, range);
783 else
784 call2 (function, range, arg);
785 }
786 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
787 {
788 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
789 load_charset (charset, 2);
790 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
791 map_char_table_for_charset (c_function, function,
792 CHARSET_ENCODER (charset), arg,
793 partial ? charset : NULL, from, to);
794 else
795 map_charset_for_dump (c_function, function, arg, from, to);
796 }
797 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
798 {
799 Lisp_Object subset_info;
800 int offset;
801
802 subset_info = CHARSET_SUBSET (charset);
803 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
804 offset = XINT (AREF (subset_info, 3));
805 from -= offset;
806 if (from < XFASTINT (AREF (subset_info, 1)))
807 from = XFASTINT (AREF (subset_info, 1));
808 to -= offset;
809 if (to > XFASTINT (AREF (subset_info, 2)))
810 to = XFASTINT (AREF (subset_info, 2));
811 map_charset_chars (c_function, function, arg, charset, from, to);
812 }
813 else /* i.e. CHARSET_METHOD_SUPERSET */
814 {
815 Lisp_Object parents;
816
817 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
818 parents = XCDR (parents))
819 {
820 int offset;
821 unsigned this_from, this_to;
822
823 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
824 offset = XINT (XCDR (XCAR (parents)));
825 this_from = from > offset ? from - offset : 0;
826 this_to = to > offset ? to - offset : 0;
827 if (this_from < CHARSET_MIN_CODE (charset))
828 this_from = CHARSET_MIN_CODE (charset);
829 if (this_to > CHARSET_MAX_CODE (charset))
830 this_to = CHARSET_MAX_CODE (charset);
831 map_charset_chars (c_function, function, arg, charset,
832 this_from, this_to);
833 }
834 }
835 }
836
837 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
838 doc: /* Call FUNCTION for all characters in CHARSET.
839 FUNCTION is called with an argument RANGE and the optional 3rd
840 argument ARG.
841
842 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
843 characters contained in CHARSET.
844
845 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
846 range of code points (in CHARSET) of target characters. */)
847 (function, charset, arg, from_code, to_code)
848 Lisp_Object function, charset, arg, from_code, to_code;
849 {
850 struct charset *cs;
851 unsigned from, to;
852
853 CHECK_CHARSET_GET_CHARSET (charset, cs);
854 if (NILP (from_code))
855 from = CHARSET_MIN_CODE (cs);
856 else
857 {
858 CHECK_NATNUM (from_code);
859 from = XINT (from_code);
860 if (from < CHARSET_MIN_CODE (cs))
861 from = CHARSET_MIN_CODE (cs);
862 }
863 if (NILP (to_code))
864 to = CHARSET_MAX_CODE (cs);
865 else
866 {
867 CHECK_NATNUM (to_code);
868 to = XINT (to_code);
869 if (to > CHARSET_MAX_CODE (cs))
870 to = CHARSET_MAX_CODE (cs);
871 }
872 map_charset_chars (NULL, function, arg, cs, from, to);
873 return Qnil;
874 }
875
876
877 /* Define a charset according to the arguments. The Nth argument is
878 the Nth attribute of the charset (the last attribute `charset-id'
879 is not included). See the docstring of `define-charset' for the
880 detail. */
881
882 DEFUN ("define-charset-internal", Fdefine_charset_internal,
883 Sdefine_charset_internal, charset_arg_max, MANY, 0,
884 doc: /* For internal use only.
885 usage: (define-charset-internal ...) */)
886 (nargs, args)
887 int nargs;
888 Lisp_Object *args;
889 {
890 /* Charset attr vector. */
891 Lisp_Object attrs;
892 Lisp_Object val;
893 unsigned hash_code;
894 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
895 int i, j;
896 struct charset charset;
897 int id;
898 int dimension;
899 int new_definition_p;
900 int nchars;
901
902 if (nargs != charset_arg_max)
903 return Fsignal (Qwrong_number_of_arguments,
904 Fcons (intern ("define-charset-internal"),
905 make_number (nargs)));
906
907 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
908
909 CHECK_SYMBOL (args[charset_arg_name]);
910 ASET (attrs, charset_name, args[charset_arg_name]);
911
912 val = args[charset_arg_code_space];
913 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
914 {
915 int min_byte, max_byte;
916
917 min_byte = XINT (Faref (val, make_number (i * 2)));
918 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
919 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
920 error ("Invalid :code-space value");
921 charset.code_space[i * 4] = min_byte;
922 charset.code_space[i * 4 + 1] = max_byte;
923 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
924 nchars *= charset.code_space[i * 4 + 2];
925 charset.code_space[i * 4 + 3] = nchars;
926 if (max_byte > 0)
927 dimension = i + 1;
928 }
929
930 val = args[charset_arg_dimension];
931 if (NILP (val))
932 charset.dimension = dimension;
933 else
934 {
935 CHECK_NATNUM (val);
936 charset.dimension = XINT (val);
937 if (charset.dimension < 1 || charset.dimension > 4)
938 args_out_of_range_3 (val, make_number (1), make_number (4));
939 }
940
941 charset.code_linear_p
942 = (charset.dimension == 1
943 || (charset.code_space[2] == 256
944 && (charset.dimension == 2
945 || (charset.code_space[6] == 256
946 && (charset.dimension == 3
947 || charset.code_space[10] == 256)))));
948
949 if (! charset.code_linear_p)
950 {
951 charset.code_space_mask = (unsigned char *) xmalloc (256);
952 bzero (charset.code_space_mask, 256);
953 for (i = 0; i < 4; i++)
954 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
955 j++)
956 charset.code_space_mask[j] |= (1 << i);
957 }
958
959 charset.iso_chars_96 = charset.code_space[2] == 96;
960
961 charset.min_code = (charset.code_space[0]
962 | (charset.code_space[4] << 8)
963 | (charset.code_space[8] << 16)
964 | (charset.code_space[12] << 24));
965 charset.max_code = (charset.code_space[1]
966 | (charset.code_space[5] << 8)
967 | (charset.code_space[9] << 16)
968 | (charset.code_space[13] << 24));
969 charset.char_index_offset = 0;
970
971 val = args[charset_arg_min_code];
972 if (! NILP (val))
973 {
974 unsigned code;
975
976 if (INTEGERP (val))
977 code = XINT (val);
978 else
979 {
980 CHECK_CONS (val);
981 CHECK_NUMBER_CAR (val);
982 CHECK_NUMBER_CDR (val);
983 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
984 }
985 if (code < charset.min_code
986 || code > charset.max_code)
987 args_out_of_range_3 (make_number (charset.min_code),
988 make_number (charset.max_code), val);
989 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
990 charset.min_code = code;
991 }
992
993 val = args[charset_arg_max_code];
994 if (! NILP (val))
995 {
996 unsigned code;
997
998 if (INTEGERP (val))
999 code = XINT (val);
1000 else
1001 {
1002 CHECK_CONS (val);
1003 CHECK_NUMBER_CAR (val);
1004 CHECK_NUMBER_CDR (val);
1005 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
1006 }
1007 if (code < charset.min_code
1008 || code > charset.max_code)
1009 args_out_of_range_3 (make_number (charset.min_code),
1010 make_number (charset.max_code), val);
1011 charset.max_code = code;
1012 }
1013
1014 charset.compact_codes_p = charset.max_code < 0x10000;
1015
1016 val = args[charset_arg_invalid_code];
1017 if (NILP (val))
1018 {
1019 if (charset.min_code > 0)
1020 charset.invalid_code = 0;
1021 else
1022 {
1023 XSETINT (val, charset.max_code + 1);
1024 if (XINT (val) == charset.max_code + 1)
1025 charset.invalid_code = charset.max_code + 1;
1026 else
1027 error ("Attribute :invalid-code must be specified");
1028 }
1029 }
1030 else
1031 {
1032 CHECK_NATNUM (val);
1033 charset.invalid_code = XFASTINT (val);
1034 }
1035
1036 val = args[charset_arg_iso_final];
1037 if (NILP (val))
1038 charset.iso_final = -1;
1039 else
1040 {
1041 CHECK_NUMBER (val);
1042 if (XINT (val) < '0' || XINT (val) > 127)
1043 error ("Invalid iso-final-char: %d", XINT (val));
1044 charset.iso_final = XINT (val);
1045 }
1046
1047 val = args[charset_arg_iso_revision];
1048 if (NILP (val))
1049 charset.iso_revision = -1;
1050 else
1051 {
1052 CHECK_NUMBER (val);
1053 if (XINT (val) > 63)
1054 args_out_of_range (make_number (63), val);
1055 charset.iso_revision = XINT (val);
1056 }
1057
1058 val = args[charset_arg_emacs_mule_id];
1059 if (NILP (val))
1060 charset.emacs_mule_id = -1;
1061 else
1062 {
1063 CHECK_NATNUM (val);
1064 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1065 error ("Invalid emacs-mule-id: %d", XINT (val));
1066 charset.emacs_mule_id = XINT (val);
1067 }
1068
1069 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1070
1071 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1072
1073 charset.unified_p = 0;
1074
1075 bzero (charset.fast_map, sizeof (charset.fast_map));
1076
1077 if (! NILP (args[charset_arg_code_offset]))
1078 {
1079 val = args[charset_arg_code_offset];
1080 CHECK_NUMBER (val);
1081
1082 charset.method = CHARSET_METHOD_OFFSET;
1083 charset.code_offset = XINT (val);
1084
1085 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1086 charset.min_char = i + charset.code_offset;
1087 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1088 charset.max_char = i + charset.code_offset;
1089 if (charset.max_char > MAX_CHAR)
1090 error ("Unsupported max char: %d", charset.max_char);
1091
1092 i = (charset.min_char >> 7) << 7;
1093 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1094 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1095 i = (i >> 12) << 12;
1096 for (; i <= charset.max_char; i += 0x1000)
1097 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1098 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1099 charset.ascii_compatible_p = 1;
1100 }
1101 else if (! NILP (args[charset_arg_map]))
1102 {
1103 val = args[charset_arg_map];
1104 ASET (attrs, charset_map, val);
1105 charset.method = CHARSET_METHOD_MAP;
1106 }
1107 else if (! NILP (args[charset_arg_subset]))
1108 {
1109 Lisp_Object parent;
1110 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1111 struct charset *parent_charset;
1112
1113 val = args[charset_arg_subset];
1114 parent = Fcar (val);
1115 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1116 parent_min_code = Fnth (make_number (1), val);
1117 CHECK_NATNUM (parent_min_code);
1118 parent_max_code = Fnth (make_number (2), val);
1119 CHECK_NATNUM (parent_max_code);
1120 parent_code_offset = Fnth (make_number (3), val);
1121 CHECK_NUMBER (parent_code_offset);
1122 val = Fmake_vector (make_number (4), Qnil);
1123 ASET (val, 0, make_number (parent_charset->id));
1124 ASET (val, 1, parent_min_code);
1125 ASET (val, 2, parent_max_code);
1126 ASET (val, 3, parent_code_offset);
1127 ASET (attrs, charset_subset, val);
1128
1129 charset.method = CHARSET_METHOD_SUBSET;
1130 /* Here, we just copy the parent's fast_map. It's not accurate,
1131 but at least it works for quickly detecting which character
1132 DOESN'T belong to this charset. */
1133 for (i = 0; i < 190; i++)
1134 charset.fast_map[i] = parent_charset->fast_map[i];
1135
1136 /* We also copy these for parents. */
1137 charset.min_char = parent_charset->min_char;
1138 charset.max_char = parent_charset->max_char;
1139 }
1140 else if (! NILP (args[charset_arg_superset]))
1141 {
1142 val = args[charset_arg_superset];
1143 charset.method = CHARSET_METHOD_SUPERSET;
1144 val = Fcopy_sequence (val);
1145 ASET (attrs, charset_superset, val);
1146
1147 charset.min_char = MAX_CHAR;
1148 charset.max_char = 0;
1149 for (; ! NILP (val); val = Fcdr (val))
1150 {
1151 Lisp_Object elt, car_part, cdr_part;
1152 int this_id, offset;
1153 struct charset *this_charset;
1154
1155 elt = Fcar (val);
1156 if (CONSP (elt))
1157 {
1158 car_part = XCAR (elt);
1159 cdr_part = XCDR (elt);
1160 CHECK_CHARSET_GET_ID (car_part, this_id);
1161 CHECK_NUMBER (cdr_part);
1162 offset = XINT (cdr_part);
1163 }
1164 else
1165 {
1166 CHECK_CHARSET_GET_ID (elt, this_id);
1167 offset = 0;
1168 }
1169 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1170
1171 this_charset = CHARSET_FROM_ID (this_id);
1172 if (charset.min_char > this_charset->min_char)
1173 charset.min_char = this_charset->min_char;
1174 if (charset.max_char < this_charset->max_char)
1175 charset.max_char = this_charset->max_char;
1176 for (i = 0; i < 190; i++)
1177 charset.fast_map[i] |= this_charset->fast_map[i];
1178 }
1179 }
1180 else
1181 error ("None of :code-offset, :map, :parents are specified");
1182
1183 val = args[charset_arg_unify_map];
1184 if (! NILP (val) && !STRINGP (val))
1185 CHECK_VECTOR (val);
1186 ASET (attrs, charset_unify_map, val);
1187
1188 CHECK_LIST (args[charset_arg_plist]);
1189 ASET (attrs, charset_plist, args[charset_arg_plist]);
1190
1191 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1192 &hash_code);
1193 if (charset.hash_index >= 0)
1194 {
1195 new_definition_p = 0;
1196 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1197 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1198 }
1199 else
1200 {
1201 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1202 hash_code);
1203 if (charset_table_used == charset_table_size)
1204 {
1205 struct charset *new_table
1206 = (struct charset *) xmalloc (sizeof (struct charset)
1207 * (charset_table_size + 16));
1208 bcopy (charset_table, new_table,
1209 sizeof (struct charset) * charset_table_size);
1210 charset_table_size += 16;
1211 charset_table = new_table;
1212 }
1213 id = charset_table_used++;
1214 new_definition_p = 1;
1215 }
1216
1217 ASET (attrs, charset_id, make_number (id));
1218 charset.id = id;
1219 charset_table[id] = charset;
1220
1221 if (charset.method == CHARSET_METHOD_MAP)
1222 {
1223 load_charset (&charset, 0);
1224 charset_table[id] = charset;
1225 }
1226
1227 if (charset.iso_final >= 0)
1228 {
1229 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1230 charset.iso_final) = id;
1231 if (new_definition_p)
1232 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1233 Fcons (make_number (id), Qnil));
1234 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1235 charset_jisx0201_roman = id;
1236 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1237 charset_jisx0208_1978 = id;
1238 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1239 charset_jisx0208 = id;
1240 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1241 charset_ksc5601 = id;
1242 }
1243
1244 if (charset.emacs_mule_id >= 0)
1245 {
1246 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1247 if (charset.emacs_mule_id < 0xA0)
1248 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1249 else
1250 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1251 if (new_definition_p)
1252 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1253 Fcons (make_number (id), Qnil));
1254 }
1255
1256 if (new_definition_p)
1257 {
1258 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1259 if (charset.supplementary_p)
1260 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1261 Fcons (make_number (id), Qnil));
1262 else
1263 {
1264 Lisp_Object tail;
1265
1266 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1267 {
1268 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1269
1270 if (cs->supplementary_p)
1271 break;
1272 }
1273 if (EQ (tail, Vcharset_ordered_list))
1274 Vcharset_ordered_list = Fcons (make_number (id),
1275 Vcharset_ordered_list);
1276 else if (NILP (tail))
1277 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1278 Fcons (make_number (id), Qnil));
1279 else
1280 {
1281 val = Fcons (XCAR (tail), XCDR (tail));
1282 XSETCDR (tail, val);
1283 XSETCAR (tail, make_number (id));
1284 }
1285 }
1286 charset_ordered_list_tick++;
1287 }
1288
1289 return Qnil;
1290 }
1291
1292
1293 /* Same as Fdefine_charset_internal but arguments are more convenient
1294 to call from C (typically in syms_of_charset). This can define a
1295 charset of `offset' method only. Return the ID of the new
1296 charset. */
1297
1298 static int
1299 define_charset_internal (name, dimension, code_space, min_code, max_code,
1300 iso_final, iso_revision, emacs_mule_id,
1301 ascii_compatible, supplementary,
1302 code_offset)
1303 Lisp_Object name;
1304 int dimension;
1305 unsigned char *code_space;
1306 unsigned min_code, max_code;
1307 int iso_final, iso_revision, emacs_mule_id;
1308 int ascii_compatible, supplementary;
1309 int code_offset;
1310 {
1311 Lisp_Object args[charset_arg_max];
1312 Lisp_Object plist[14];
1313 Lisp_Object val;
1314 int i;
1315
1316 args[charset_arg_name] = name;
1317 args[charset_arg_dimension] = make_number (dimension);
1318 val = Fmake_vector (make_number (8), make_number (0));
1319 for (i = 0; i < 8; i++)
1320 ASET (val, i, make_number (code_space[i]));
1321 args[charset_arg_code_space] = val;
1322 args[charset_arg_min_code] = make_number (min_code);
1323 args[charset_arg_max_code] = make_number (max_code);
1324 args[charset_arg_iso_final]
1325 = (iso_final < 0 ? Qnil : make_number (iso_final));
1326 args[charset_arg_iso_revision] = make_number (iso_revision);
1327 args[charset_arg_emacs_mule_id]
1328 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1329 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1330 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1331 args[charset_arg_invalid_code] = Qnil;
1332 args[charset_arg_code_offset] = make_number (code_offset);
1333 args[charset_arg_map] = Qnil;
1334 args[charset_arg_subset] = Qnil;
1335 args[charset_arg_superset] = Qnil;
1336 args[charset_arg_unify_map] = Qnil;
1337
1338 plist[0] = intern_c_string (":name");
1339 plist[1] = args[charset_arg_name];
1340 plist[2] = intern_c_string (":dimension");
1341 plist[3] = args[charset_arg_dimension];
1342 plist[4] = intern_c_string (":code-space");
1343 plist[5] = args[charset_arg_code_space];
1344 plist[6] = intern_c_string (":iso-final-char");
1345 plist[7] = args[charset_arg_iso_final];
1346 plist[8] = intern_c_string (":emacs-mule-id");
1347 plist[9] = args[charset_arg_emacs_mule_id];
1348 plist[10] = intern_c_string (":ascii-compatible-p");
1349 plist[11] = args[charset_arg_ascii_compatible_p];
1350 plist[12] = intern_c_string (":code-offset");
1351 plist[13] = args[charset_arg_code_offset];
1352
1353 args[charset_arg_plist] = Flist (14, plist);
1354 Fdefine_charset_internal (charset_arg_max, args);
1355
1356 return XINT (CHARSET_SYMBOL_ID (name));
1357 }
1358
1359
1360 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1361 Sdefine_charset_alias, 2, 2, 0,
1362 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1363 (alias, charset)
1364 Lisp_Object alias, charset;
1365 {
1366 Lisp_Object attr;
1367
1368 CHECK_CHARSET_GET_ATTR (charset, attr);
1369 Fputhash (alias, attr, Vcharset_hash_table);
1370 Vcharset_list = Fcons (alias, Vcharset_list);
1371 return Qnil;
1372 }
1373
1374
1375 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1376 doc: /* Return the property list of CHARSET. */)
1377 (charset)
1378 Lisp_Object charset;
1379 {
1380 Lisp_Object attrs;
1381
1382 CHECK_CHARSET_GET_ATTR (charset, attrs);
1383 return CHARSET_ATTR_PLIST (attrs);
1384 }
1385
1386
1387 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1388 doc: /* Set CHARSET's property list to PLIST. */)
1389 (charset, plist)
1390 Lisp_Object charset, plist;
1391 {
1392 Lisp_Object attrs;
1393
1394 CHECK_CHARSET_GET_ATTR (charset, attrs);
1395 CHARSET_ATTR_PLIST (attrs) = plist;
1396 return plist;
1397 }
1398
1399
1400 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1401 doc: /* Unify characters of CHARSET with Unicode.
1402 This means reading the relevant file and installing the table defined
1403 by CHARSET's `:unify-map' property.
1404
1405 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1406 the same meaning as the `:unify-map' attribute in the function
1407 `define-charset' (which see).
1408
1409 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1410 (charset, unify_map, deunify)
1411 Lisp_Object charset, unify_map, deunify;
1412 {
1413 int id;
1414 struct charset *cs;
1415
1416 CHECK_CHARSET_GET_ID (charset, id);
1417 cs = CHARSET_FROM_ID (id);
1418 if (NILP (deunify)
1419 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1420 : ! CHARSET_UNIFIED_P (cs))
1421 return Qnil;
1422
1423 CHARSET_UNIFIED_P (cs) = 0;
1424 if (NILP (deunify))
1425 {
1426 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1427 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1428 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1429 if (NILP (unify_map))
1430 unify_map = CHARSET_UNIFY_MAP (cs);
1431 else
1432 {
1433 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1434 signal_error ("Bad unify-map", unify_map);
1435 CHARSET_UNIFY_MAP (cs) = unify_map;
1436 }
1437 if (NILP (Vchar_unify_table))
1438 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1439 char_table_set_range (Vchar_unify_table,
1440 cs->min_char, cs->max_char, charset);
1441 CHARSET_UNIFIED_P (cs) = 1;
1442 }
1443 else if (CHAR_TABLE_P (Vchar_unify_table))
1444 {
1445 int min_code = CHARSET_MIN_CODE (cs);
1446 int max_code = CHARSET_MAX_CODE (cs);
1447 int min_char = DECODE_CHAR (cs, min_code);
1448 int max_char = DECODE_CHAR (cs, max_code);
1449
1450 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1451 }
1452
1453 return Qnil;
1454 }
1455
1456 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1457 Sget_unused_iso_final_char, 2, 2, 0,
1458 doc: /*
1459 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1460 DIMENSION is the number of bytes to represent a character: 1 or 2.
1461 CHARS is the number of characters in a dimension: 94 or 96.
1462
1463 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1464 If there's no unused final char for the specified kind of charset,
1465 return nil. */)
1466 (dimension, chars)
1467 Lisp_Object dimension, chars;
1468 {
1469 int final_char;
1470
1471 CHECK_NUMBER (dimension);
1472 CHECK_NUMBER (chars);
1473 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1474 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1475 if (XINT (chars) != 94 && XINT (chars) != 96)
1476 args_out_of_range_3 (chars, make_number (94), make_number (96));
1477 for (final_char = '0'; final_char <= '?'; final_char++)
1478 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1479 break;
1480 return (final_char <= '?' ? make_number (final_char) : Qnil);
1481 }
1482
1483 static void
1484 check_iso_charset_parameter (dimension, chars, final_char)
1485 Lisp_Object dimension, chars, final_char;
1486 {
1487 CHECK_NATNUM (dimension);
1488 CHECK_NATNUM (chars);
1489 CHECK_NATNUM (final_char);
1490
1491 if (XINT (dimension) > 3)
1492 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1493 if (XINT (chars) != 94 && XINT (chars) != 96)
1494 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1495 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1496 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1497 }
1498
1499
1500 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1501 4, 4, 0,
1502 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1503
1504 On decoding by an ISO-2022 base coding system, when a charset
1505 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1506 if CHARSET is designated instead. */)
1507 (dimension, chars, final_char, charset)
1508 Lisp_Object dimension, chars, final_char, charset;
1509 {
1510 int id;
1511 int chars_flag;
1512
1513 CHECK_CHARSET_GET_ID (charset, id);
1514 check_iso_charset_parameter (dimension, chars, final_char);
1515 chars_flag = XINT (chars) == 96;
1516 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1517 return Qnil;
1518 }
1519
1520
1521 /* Return information about charsets in the text at PTR of NBYTES
1522 bytes, which are NCHARS characters. The value is:
1523
1524 0: Each character is represented by one byte. This is always
1525 true for a unibyte string. For a multibyte string, true if
1526 it contains only ASCII characters.
1527
1528 1: No charsets other than ascii, control-1, and latin-1 are
1529 found.
1530
1531 2: Otherwise.
1532 */
1533
1534 int
1535 string_xstring_p (string)
1536 Lisp_Object string;
1537 {
1538 const unsigned char *p = SDATA (string);
1539 const unsigned char *endp = p + SBYTES (string);
1540
1541 if (SCHARS (string) == SBYTES (string))
1542 return 0;
1543
1544 while (p < endp)
1545 {
1546 int c = STRING_CHAR_ADVANCE (p);
1547
1548 if (c >= 0x100)
1549 return 2;
1550 }
1551 return 1;
1552 }
1553
1554
1555 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1556
1557 CHARSETS is a vector. If Nth element is non-nil, it means the
1558 charset whose id is N is already found.
1559
1560 It may lookup a translation table TABLE if supplied. */
1561
1562 static void
1563 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1564 const unsigned char *ptr;
1565 EMACS_INT nchars, nbytes;
1566 Lisp_Object charsets, table;
1567 int multibyte;
1568 {
1569 const unsigned char *pend = ptr + nbytes;
1570
1571 if (nchars == nbytes)
1572 {
1573 if (multibyte)
1574 ASET (charsets, charset_ascii, Qt);
1575 else
1576 while (ptr < pend)
1577 {
1578 int c = *ptr++;
1579
1580 if (!NILP (table))
1581 c = translate_char (table, c);
1582 if (ASCII_BYTE_P (c))
1583 ASET (charsets, charset_ascii, Qt);
1584 else
1585 ASET (charsets, charset_eight_bit, Qt);
1586 }
1587 }
1588 else
1589 {
1590 while (ptr < pend)
1591 {
1592 int c = STRING_CHAR_ADVANCE (ptr);
1593 struct charset *charset;
1594
1595 if (!NILP (table))
1596 c = translate_char (table, c);
1597 charset = CHAR_CHARSET (c);
1598 ASET (charsets, CHARSET_ID (charset), Qt);
1599 }
1600 }
1601 }
1602
1603 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1604 2, 3, 0,
1605 doc: /* Return a list of charsets in the region between BEG and END.
1606 BEG and END are buffer positions.
1607 Optional arg TABLE if non-nil is a translation table to look up.
1608
1609 If the current buffer is unibyte, the returned list may contain
1610 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1611 (beg, end, table)
1612 Lisp_Object beg, end, table;
1613 {
1614 Lisp_Object charsets;
1615 EMACS_INT from, from_byte, to, stop, stop_byte;
1616 int i;
1617 Lisp_Object val;
1618 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1619
1620 validate_region (&beg, &end);
1621 from = XFASTINT (beg);
1622 stop = to = XFASTINT (end);
1623
1624 if (from < GPT && GPT < to)
1625 {
1626 stop = GPT;
1627 stop_byte = GPT_BYTE;
1628 }
1629 else
1630 stop_byte = CHAR_TO_BYTE (stop);
1631
1632 from_byte = CHAR_TO_BYTE (from);
1633
1634 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1635 while (1)
1636 {
1637 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1638 stop_byte - from_byte, charsets, table,
1639 multibyte);
1640 if (stop < to)
1641 {
1642 from = stop, from_byte = stop_byte;
1643 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1644 }
1645 else
1646 break;
1647 }
1648
1649 val = Qnil;
1650 for (i = charset_table_used - 1; i >= 0; i--)
1651 if (!NILP (AREF (charsets, i)))
1652 val = Fcons (CHARSET_NAME (charset_table + i), val);
1653 return val;
1654 }
1655
1656 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1657 1, 2, 0,
1658 doc: /* Return a list of charsets in STR.
1659 Optional arg TABLE if non-nil is a translation table to look up.
1660
1661 If STR is unibyte, the returned list may contain
1662 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1663 (str, table)
1664 Lisp_Object str, table;
1665 {
1666 Lisp_Object charsets;
1667 int i;
1668 Lisp_Object val;
1669
1670 CHECK_STRING (str);
1671
1672 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1673 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1674 charsets, table,
1675 STRING_MULTIBYTE (str));
1676 val = Qnil;
1677 for (i = charset_table_used - 1; i >= 0; i--)
1678 if (!NILP (AREF (charsets, i)))
1679 val = Fcons (CHARSET_NAME (charset_table + i), val);
1680 return val;
1681 }
1682
1683 \f
1684
1685 /* Return a unified character code for C (>= 0x110000). VAL is a
1686 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1687 charset symbol. */
1688 int
1689 maybe_unify_char (c, val)
1690 int c;
1691 Lisp_Object val;
1692 {
1693 struct charset *charset;
1694
1695 if (INTEGERP (val))
1696 return XINT (val);
1697 if (NILP (val))
1698 return c;
1699
1700 CHECK_CHARSET_GET_CHARSET (val, charset);
1701 load_charset (charset, 1);
1702 if (! inhibit_load_charset_map)
1703 {
1704 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1705 if (! NILP (val))
1706 c = XINT (val);
1707 }
1708 else
1709 {
1710 int code_index = c - CHARSET_CODE_OFFSET (charset);
1711 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1712
1713 if (unified > 0)
1714 c = unified;
1715 }
1716 return c;
1717 }
1718
1719
1720 /* Return a character correponding to the code-point CODE of
1721 CHARSET. */
1722
1723 int
1724 decode_char (charset, code)
1725 struct charset *charset;
1726 unsigned code;
1727 {
1728 int c, char_index;
1729 enum charset_method method = CHARSET_METHOD (charset);
1730
1731 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1732 return -1;
1733
1734 if (method == CHARSET_METHOD_SUBSET)
1735 {
1736 Lisp_Object subset_info;
1737
1738 subset_info = CHARSET_SUBSET (charset);
1739 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1740 code -= XINT (AREF (subset_info, 3));
1741 if (code < XFASTINT (AREF (subset_info, 1))
1742 || code > XFASTINT (AREF (subset_info, 2)))
1743 c = -1;
1744 else
1745 c = DECODE_CHAR (charset, code);
1746 }
1747 else if (method == CHARSET_METHOD_SUPERSET)
1748 {
1749 Lisp_Object parents;
1750
1751 parents = CHARSET_SUPERSET (charset);
1752 c = -1;
1753 for (; CONSP (parents); parents = XCDR (parents))
1754 {
1755 int id = XINT (XCAR (XCAR (parents)));
1756 int code_offset = XINT (XCDR (XCAR (parents)));
1757 unsigned this_code = code - code_offset;
1758
1759 charset = CHARSET_FROM_ID (id);
1760 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1761 break;
1762 }
1763 }
1764 else
1765 {
1766 char_index = CODE_POINT_TO_INDEX (charset, code);
1767 if (char_index < 0)
1768 return -1;
1769
1770 if (method == CHARSET_METHOD_MAP)
1771 {
1772 Lisp_Object decoder;
1773
1774 decoder = CHARSET_DECODER (charset);
1775 if (! VECTORP (decoder))
1776 {
1777 load_charset (charset, 1);
1778 decoder = CHARSET_DECODER (charset);
1779 }
1780 if (VECTORP (decoder))
1781 c = XINT (AREF (decoder, char_index));
1782 else
1783 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1784 }
1785 else /* method == CHARSET_METHOD_OFFSET */
1786 {
1787 c = char_index + CHARSET_CODE_OFFSET (charset);
1788 if (CHARSET_UNIFIED_P (charset)
1789 && c > MAX_UNICODE_CHAR)
1790 MAYBE_UNIFY_CHAR (c);
1791 }
1792 }
1793
1794 return c;
1795 }
1796
1797 /* Variable used temporarily by the macro ENCODE_CHAR. */
1798 Lisp_Object charset_work;
1799
1800 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1801 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1802 use CHARSET's strict_max_char instead of max_char. */
1803
1804 unsigned
1805 encode_char (charset, c)
1806 struct charset *charset;
1807 int c;
1808 {
1809 unsigned code;
1810 enum charset_method method = CHARSET_METHOD (charset);
1811
1812 if (CHARSET_UNIFIED_P (charset))
1813 {
1814 Lisp_Object deunifier;
1815 int code_index = -1;
1816
1817 deunifier = CHARSET_DEUNIFIER (charset);
1818 if (! CHAR_TABLE_P (deunifier))
1819 {
1820 load_charset (charset, 2);
1821 deunifier = CHARSET_DEUNIFIER (charset);
1822 }
1823 if (CHAR_TABLE_P (deunifier))
1824 {
1825 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1826
1827 if (INTEGERP (deunified))
1828 code_index = XINT (deunified);
1829 }
1830 else
1831 {
1832 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1833 }
1834 if (code_index >= 0)
1835 c = CHARSET_CODE_OFFSET (charset) + code_index;
1836 }
1837
1838 if (method == CHARSET_METHOD_SUBSET)
1839 {
1840 Lisp_Object subset_info;
1841 struct charset *this_charset;
1842
1843 subset_info = CHARSET_SUBSET (charset);
1844 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1845 code = ENCODE_CHAR (this_charset, c);
1846 if (code == CHARSET_INVALID_CODE (this_charset)
1847 || code < XFASTINT (AREF (subset_info, 1))
1848 || code > XFASTINT (AREF (subset_info, 2)))
1849 return CHARSET_INVALID_CODE (charset);
1850 code += XINT (AREF (subset_info, 3));
1851 return code;
1852 }
1853
1854 if (method == CHARSET_METHOD_SUPERSET)
1855 {
1856 Lisp_Object parents;
1857
1858 parents = CHARSET_SUPERSET (charset);
1859 for (; CONSP (parents); parents = XCDR (parents))
1860 {
1861 int id = XINT (XCAR (XCAR (parents)));
1862 int code_offset = XINT (XCDR (XCAR (parents)));
1863 struct charset *this_charset = CHARSET_FROM_ID (id);
1864
1865 code = ENCODE_CHAR (this_charset, c);
1866 if (code != CHARSET_INVALID_CODE (this_charset))
1867 return code + code_offset;
1868 }
1869 return CHARSET_INVALID_CODE (charset);
1870 }
1871
1872 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1873 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1874 return CHARSET_INVALID_CODE (charset);
1875
1876 if (method == CHARSET_METHOD_MAP)
1877 {
1878 Lisp_Object encoder;
1879 Lisp_Object val;
1880
1881 encoder = CHARSET_ENCODER (charset);
1882 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1883 {
1884 load_charset (charset, 2);
1885 encoder = CHARSET_ENCODER (charset);
1886 }
1887 if (CHAR_TABLE_P (encoder))
1888 {
1889 val = CHAR_TABLE_REF (encoder, c);
1890 if (NILP (val))
1891 return CHARSET_INVALID_CODE (charset);
1892 code = XINT (val);
1893 if (! CHARSET_COMPACT_CODES_P (charset))
1894 code = INDEX_TO_CODE_POINT (charset, code);
1895 }
1896 else
1897 {
1898 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1899 code = INDEX_TO_CODE_POINT (charset, code);
1900 }
1901 }
1902 else /* method == CHARSET_METHOD_OFFSET */
1903 {
1904 int code_index = c - CHARSET_CODE_OFFSET (charset);
1905
1906 code = INDEX_TO_CODE_POINT (charset, code_index);
1907 }
1908
1909 return code;
1910 }
1911
1912
1913 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1914 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1915 Return nil if CODE-POINT is not valid in CHARSET.
1916
1917 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1918
1919 Optional argument RESTRICTION specifies a way to map the pair of CCS
1920 and CODE-POINT to a character. Currently not supported and just ignored. */)
1921 (charset, code_point, restriction)
1922 Lisp_Object charset, code_point, restriction;
1923 {
1924 int c, id;
1925 unsigned code;
1926 struct charset *charsetp;
1927
1928 CHECK_CHARSET_GET_ID (charset, id);
1929 if (CONSP (code_point))
1930 {
1931 CHECK_NATNUM_CAR (code_point);
1932 CHECK_NATNUM_CDR (code_point);
1933 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1934 }
1935 else
1936 {
1937 CHECK_NATNUM (code_point);
1938 code = XINT (code_point);
1939 }
1940 charsetp = CHARSET_FROM_ID (id);
1941 c = DECODE_CHAR (charsetp, code);
1942 return (c >= 0 ? make_number (c) : Qnil);
1943 }
1944
1945
1946 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1947 doc: /* Encode the character CH into a code-point of CHARSET.
1948 Return nil if CHARSET doesn't include CH.
1949
1950 Optional argument RESTRICTION specifies a way to map CH to a
1951 code-point in CCS. Currently not supported and just ignored. */)
1952 (ch, charset, restriction)
1953 Lisp_Object ch, charset, restriction;
1954 {
1955 int id;
1956 unsigned code;
1957 struct charset *charsetp;
1958
1959 CHECK_CHARSET_GET_ID (charset, id);
1960 CHECK_NATNUM (ch);
1961 charsetp = CHARSET_FROM_ID (id);
1962 code = ENCODE_CHAR (charsetp, XINT (ch));
1963 if (code == CHARSET_INVALID_CODE (charsetp))
1964 return Qnil;
1965 if (code > 0x7FFFFFF)
1966 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1967 return make_number (code);
1968 }
1969
1970
1971 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1972 doc:
1973 /* Return a character of CHARSET whose position codes are CODEn.
1974
1975 CODE1 through CODE4 are optional, but if you don't supply sufficient
1976 position codes, it is assumed that the minimum code in each dimension
1977 is specified. */)
1978 (charset, code1, code2, code3, code4)
1979 Lisp_Object charset, code1, code2, code3, code4;
1980 {
1981 int id, dimension;
1982 struct charset *charsetp;
1983 unsigned code;
1984 int c;
1985
1986 CHECK_CHARSET_GET_ID (charset, id);
1987 charsetp = CHARSET_FROM_ID (id);
1988
1989 dimension = CHARSET_DIMENSION (charsetp);
1990 if (NILP (code1))
1991 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1992 ? 0 : CHARSET_MIN_CODE (charsetp));
1993 else
1994 {
1995 CHECK_NATNUM (code1);
1996 if (XFASTINT (code1) >= 0x100)
1997 args_out_of_range (make_number (0xFF), code1);
1998 code = XFASTINT (code1);
1999
2000 if (dimension > 1)
2001 {
2002 code <<= 8;
2003 if (NILP (code2))
2004 code |= charsetp->code_space[(dimension - 2) * 4];
2005 else
2006 {
2007 CHECK_NATNUM (code2);
2008 if (XFASTINT (code2) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code2);
2010 code |= XFASTINT (code2);
2011 }
2012
2013 if (dimension > 2)
2014 {
2015 code <<= 8;
2016 if (NILP (code3))
2017 code |= charsetp->code_space[(dimension - 3) * 4];
2018 else
2019 {
2020 CHECK_NATNUM (code3);
2021 if (XFASTINT (code3) >= 0x100)
2022 args_out_of_range (make_number (0xFF), code3);
2023 code |= XFASTINT (code3);
2024 }
2025
2026 if (dimension > 3)
2027 {
2028 code <<= 8;
2029 if (NILP (code4))
2030 code |= charsetp->code_space[0];
2031 else
2032 {
2033 CHECK_NATNUM (code4);
2034 if (XFASTINT (code4) >= 0x100)
2035 args_out_of_range (make_number (0xFF), code4);
2036 code |= XFASTINT (code4);
2037 }
2038 }
2039 }
2040 }
2041 }
2042
2043 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2044 code &= 0x7F7F7F7F;
2045 c = DECODE_CHAR (charsetp, code);
2046 if (c < 0)
2047 error ("Invalid code(s)");
2048 return make_number (c);
2049 }
2050
2051
2052 /* Return the first charset in CHARSET_LIST that contains C.
2053 CHARSET_LIST is a list of charset IDs. If it is nil, use
2054 Vcharset_ordered_list. */
2055
2056 struct charset *
2057 char_charset (c, charset_list, code_return)
2058 int c;
2059 Lisp_Object charset_list;
2060 unsigned *code_return;
2061 {
2062 int maybe_null = 0;
2063
2064 if (NILP (charset_list))
2065 charset_list = Vcharset_ordered_list;
2066 else
2067 maybe_null = 1;
2068
2069 while (CONSP (charset_list))
2070 {
2071 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2072 unsigned code = ENCODE_CHAR (charset, c);
2073
2074 if (code != CHARSET_INVALID_CODE (charset))
2075 {
2076 if (code_return)
2077 *code_return = code;
2078 return charset;
2079 }
2080 charset_list = XCDR (charset_list);
2081 if (c <= MAX_UNICODE_CHAR
2082 && EQ (charset_list, Vcharset_non_preferred_head))
2083 return CHARSET_FROM_ID (charset_unicode);
2084 }
2085 return (maybe_null ? NULL
2086 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2087 : CHARSET_FROM_ID (charset_eight_bit));
2088 }
2089
2090
2091 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2092 doc:
2093 /*Return list of charset and one to four position-codes of CH.
2094 The charset is decided by the current priority order of charsets.
2095 A position-code is a byte value of each dimension of the code-point of
2096 CH in the charset. */)
2097 (ch)
2098 Lisp_Object ch;
2099 {
2100 struct charset *charset;
2101 int c, dimension;
2102 unsigned code;
2103 Lisp_Object val;
2104
2105 CHECK_CHARACTER (ch);
2106 c = XFASTINT (ch);
2107 charset = CHAR_CHARSET (c);
2108 if (! charset)
2109 abort ();
2110 code = ENCODE_CHAR (charset, c);
2111 if (code == CHARSET_INVALID_CODE (charset))
2112 abort ();
2113 dimension = CHARSET_DIMENSION (charset);
2114 for (val = Qnil; dimension > 0; dimension--)
2115 {
2116 val = Fcons (make_number (code & 0xFF), val);
2117 code >>= 8;
2118 }
2119 return Fcons (CHARSET_NAME (charset), val);
2120 }
2121
2122
2123 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2124 doc: /* Return the charset of highest priority that contains CH.
2125 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2126 from which to find the charset. It may also be a coding system. In
2127 that case, find the charset from what supported by that coding system. */)
2128 (ch, restriction)
2129 Lisp_Object ch, restriction;
2130 {
2131 struct charset *charset;
2132
2133 CHECK_CHARACTER (ch);
2134 if (NILP (restriction))
2135 charset = CHAR_CHARSET (XINT (ch));
2136 else
2137 {
2138 Lisp_Object charset_list;
2139
2140 if (CONSP (restriction))
2141 {
2142 for (charset_list = Qnil; CONSP (restriction);
2143 restriction = XCDR (restriction))
2144 {
2145 int id;
2146
2147 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2148 charset_list = Fcons (make_number (id), charset_list);
2149 }
2150 charset_list = Fnreverse (charset_list);
2151 }
2152 else
2153 charset_list = coding_system_charset_list (restriction);
2154 charset = char_charset (XINT (ch), charset_list, NULL);
2155 if (! charset)
2156 return Qnil;
2157 }
2158 return (CHARSET_NAME (charset));
2159 }
2160
2161
2162 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2163 doc: /*
2164 Return charset of a character in the current buffer at position POS.
2165 If POS is nil, it defauls to the current point.
2166 If POS is out of range, the value is nil. */)
2167 (pos)
2168 Lisp_Object pos;
2169 {
2170 Lisp_Object ch;
2171 struct charset *charset;
2172
2173 ch = Fchar_after (pos);
2174 if (! INTEGERP (ch))
2175 return ch;
2176 charset = CHAR_CHARSET (XINT (ch));
2177 return (CHARSET_NAME (charset));
2178 }
2179
2180
2181 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2182 doc: /*
2183 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2184
2185 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2186 by their DIMENSION, CHARS, and FINAL-CHAR,
2187 whereas Emacs distinguishes them by charset symbol.
2188 See the documentation of the function `charset-info' for the meanings of
2189 DIMENSION, CHARS, and FINAL-CHAR. */)
2190 (dimension, chars, final_char)
2191 Lisp_Object dimension, chars, final_char;
2192 {
2193 int id;
2194 int chars_flag;
2195
2196 check_iso_charset_parameter (dimension, chars, final_char);
2197 chars_flag = XFASTINT (chars) == 96;
2198 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2199 XFASTINT (final_char));
2200 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2201 }
2202
2203
2204 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2205 0, 0, 0,
2206 doc: /*
2207 Internal use only.
2208 Clear temporary charset mapping tables.
2209 It should be called only from temacs invoked for dumping. */)
2210 ()
2211 {
2212 if (temp_charset_work)
2213 {
2214 free (temp_charset_work);
2215 temp_charset_work = NULL;
2216 }
2217
2218 if (CHAR_TABLE_P (Vchar_unify_table))
2219 Foptimize_char_table (Vchar_unify_table, Qnil);
2220
2221 return Qnil;
2222 }
2223
2224 DEFUN ("charset-priority-list", Fcharset_priority_list,
2225 Scharset_priority_list, 0, 1, 0,
2226 doc: /* Return the list of charsets ordered by priority.
2227 HIGHESTP non-nil means just return the highest priority one. */)
2228 (highestp)
2229 Lisp_Object highestp;
2230 {
2231 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2232
2233 if (!NILP (highestp))
2234 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2235
2236 while (!NILP (list))
2237 {
2238 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2239 list = XCDR (list);
2240 }
2241 return Fnreverse (val);
2242 }
2243
2244 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2245 1, MANY, 0,
2246 doc: /* Assign higher priority to the charsets given as arguments.
2247 usage: (set-charset-priority &rest charsets) */)
2248 (nargs, args)
2249 int nargs;
2250 Lisp_Object *args;
2251 {
2252 Lisp_Object new_head, old_list, arglist[2];
2253 Lisp_Object list_2022, list_emacs_mule;
2254 int i, id;
2255
2256 old_list = Fcopy_sequence (Vcharset_ordered_list);
2257 new_head = Qnil;
2258 for (i = 0; i < nargs; i++)
2259 {
2260 CHECK_CHARSET_GET_ID (args[i], id);
2261 if (! NILP (Fmemq (make_number (id), old_list)))
2262 {
2263 old_list = Fdelq (make_number (id), old_list);
2264 new_head = Fcons (make_number (id), new_head);
2265 }
2266 }
2267 arglist[0] = Fnreverse (new_head);
2268 arglist[1] = Vcharset_non_preferred_head = old_list;
2269 Vcharset_ordered_list = Fnconc (2, arglist);
2270 charset_ordered_list_tick++;
2271
2272 charset_unibyte = -1;
2273 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2274 CONSP (old_list); old_list = XCDR (old_list))
2275 {
2276 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2277 list_2022 = Fcons (XCAR (old_list), list_2022);
2278 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2279 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2280 if (charset_unibyte < 0)
2281 {
2282 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2283
2284 if (CHARSET_DIMENSION (charset) == 1
2285 && CHARSET_ASCII_COMPATIBLE_P (charset)
2286 && CHARSET_MAX_CHAR (charset) >= 0x80)
2287 charset_unibyte = CHARSET_ID (charset);
2288 }
2289 }
2290 Viso_2022_charset_list = Fnreverse (list_2022);
2291 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2292 if (charset_unibyte < 0)
2293 charset_unibyte = charset_iso_8859_1;
2294
2295 return Qnil;
2296 }
2297
2298 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2299 0, 1, 0,
2300 doc: /* Internal use only.
2301 Return charset identification number of CHARSET. */)
2302 (charset)
2303 Lisp_Object charset;
2304 {
2305 int id;
2306
2307 CHECK_CHARSET_GET_ID (charset, id);
2308 return make_number (id);
2309 }
2310
2311 \f
2312 void
2313 init_charset ()
2314 {
2315 Lisp_Object tempdir;
2316 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2317 if (access ((char *) SDATA (tempdir), 0) < 0)
2318 {
2319 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2320 Emacs will not function correctly without the character map files.\n\
2321 Please check your installation!\n",
2322 tempdir);
2323 /* TODO should this be a fatal error? (Bug#909) */
2324 }
2325
2326 Vcharset_map_path = Fcons (tempdir, Qnil);
2327 }
2328
2329
2330 void
2331 init_charset_once ()
2332 {
2333 int i, j, k;
2334
2335 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2336 for (j = 0; j < ISO_MAX_CHARS; j++)
2337 for (k = 0; k < ISO_MAX_FINAL; k++)
2338 iso_charset_table[i][j][k] = -1;
2339
2340 for (i = 0; i < 256; i++)
2341 emacs_mule_charset[i] = NULL;
2342
2343 charset_jisx0201_roman = -1;
2344 charset_jisx0208_1978 = -1;
2345 charset_jisx0208 = -1;
2346 charset_ksc5601 = -1;
2347 }
2348
2349 #ifdef emacs
2350
2351 void
2352 syms_of_charset ()
2353 {
2354 DEFSYM (Qcharsetp, "charsetp");
2355
2356 DEFSYM (Qascii, "ascii");
2357 DEFSYM (Qunicode, "unicode");
2358 DEFSYM (Qemacs, "emacs");
2359 DEFSYM (Qeight_bit, "eight-bit");
2360 DEFSYM (Qiso_8859_1, "iso-8859-1");
2361
2362 DEFSYM (Qgl, "gl");
2363 DEFSYM (Qgr, "gr");
2364
2365 staticpro (&Vcharset_ordered_list);
2366 Vcharset_ordered_list = Qnil;
2367
2368 staticpro (&Viso_2022_charset_list);
2369 Viso_2022_charset_list = Qnil;
2370
2371 staticpro (&Vemacs_mule_charset_list);
2372 Vemacs_mule_charset_list = Qnil;
2373
2374 /* Don't staticpro them here. It's done in syms_of_fns. */
2375 QCtest = intern (":test");
2376 Qeq = intern ("eq");
2377
2378 staticpro (&Vcharset_hash_table);
2379 {
2380 Lisp_Object args[2];
2381 args[0] = QCtest;
2382 args[1] = Qeq;
2383 Vcharset_hash_table = Fmake_hash_table (2, args);
2384 }
2385
2386 charset_table_size = 128;
2387 charset_table = ((struct charset *)
2388 xmalloc (sizeof (struct charset) * charset_table_size));
2389 charset_table_used = 0;
2390
2391 defsubr (&Scharsetp);
2392 defsubr (&Smap_charset_chars);
2393 defsubr (&Sdefine_charset_internal);
2394 defsubr (&Sdefine_charset_alias);
2395 defsubr (&Scharset_plist);
2396 defsubr (&Sset_charset_plist);
2397 defsubr (&Sunify_charset);
2398 defsubr (&Sget_unused_iso_final_char);
2399 defsubr (&Sdeclare_equiv_charset);
2400 defsubr (&Sfind_charset_region);
2401 defsubr (&Sfind_charset_string);
2402 defsubr (&Sdecode_char);
2403 defsubr (&Sencode_char);
2404 defsubr (&Ssplit_char);
2405 defsubr (&Smake_char);
2406 defsubr (&Schar_charset);
2407 defsubr (&Scharset_after);
2408 defsubr (&Siso_charset);
2409 defsubr (&Sclear_charset_maps);
2410 defsubr (&Scharset_priority_list);
2411 defsubr (&Sset_charset_priority);
2412 defsubr (&Scharset_id_internal);
2413
2414 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2415 doc: /* *List of directories to search for charset map files. */);
2416 Vcharset_map_path = Qnil;
2417
2418 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2419 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2420 inhibit_load_charset_map = 0;
2421
2422 DEFVAR_LISP ("charset-list", &Vcharset_list,
2423 doc: /* List of all charsets ever defined. */);
2424 Vcharset_list = Qnil;
2425
2426 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2427 doc: /* ISO639 language mnemonic symbol for the current language environment.
2428 If the current language environment is for multiple languages (e.g. "Latin-1"),
2429 the value may be a list of mnemonics. */);
2430 Vcurrent_iso639_language = Qnil;
2431
2432 charset_ascii
2433 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2434 0, 127, 'B', -1, 0, 1, 0, 0);
2435 charset_iso_8859_1
2436 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2437 0, 255, -1, -1, -1, 1, 0, 0);
2438 charset_unicode
2439 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2440 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2441 charset_emacs
2442 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2443 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2444 charset_eight_bit
2445 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2446 128, 255, -1, 0, -1, 0, 1,
2447 MAX_5_BYTE_CHAR + 1);
2448 charset_unibyte = charset_iso_8859_1;
2449 }
2450
2451 #endif /* emacs */
2452
2453 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2454 (do not change this comment) */