Merge from emacs-23
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
41
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
49
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
54
55 */
56
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
60
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
64
65 /* Table of struct charset. */
66 struct charset *charset_table;
67
68 static int charset_table_size;
69 static int charset_table_used;
70
71 Lisp_Object Qcharsetp;
72
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
79
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
86
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
92
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
95
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
98
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
101
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
105
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
110
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
113
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
116
117 struct charset *emacs_mule_charset[256];
118
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
123 Lisp_Object Vcharset_map_path;
124
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
127
128 Lisp_Object Vcurrent_iso639_language;
129
130 /* Defined in chartab.c */
131 extern void
132 map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to);
136
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
152 : -1)
153
154
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
171
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
174
175 static struct
176 {
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205 } *temp_charset_work;
206
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
229 \f
230
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded;
234
235 struct charset_map_entries
236 {
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242 };
243
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
253
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
259
260 If CONTROL_FLAG is 2, setup the following tables.
261
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 */
268
269 static void
270 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
271 {
272 Lisp_Object vec, table;
273 unsigned max_code = CHARSET_MAX_CODE (charset);
274 int ascii_compatible_p = charset->ascii_compatible_p;
275 int min_char, max_char, nonascii_min_char;
276 int i;
277 unsigned char *fast_map = charset->fast_map;
278
279 if (n_entries <= 0)
280 return;
281
282 if (control_flag)
283 {
284 if (! inhibit_load_charset_map)
285 {
286 if (control_flag == 1)
287 {
288 if (charset->method == CHARSET_METHOD_MAP)
289 {
290 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
291
292 vec = CHARSET_DECODER (charset)
293 = Fmake_vector (make_number (n), make_number (-1));
294 }
295 else
296 {
297 char_table_set_range (Vchar_unify_table,
298 charset->min_char, charset->max_char,
299 Qnil);
300 }
301 }
302 else
303 {
304 table = Fmake_char_table (Qnil, Qnil);
305 if (charset->method == CHARSET_METHOD_MAP)
306 CHARSET_ENCODER (charset) = table;
307 else
308 CHARSET_DEUNIFIER (charset) = table;
309 }
310 }
311 else
312 {
313 if (! temp_charset_work)
314 temp_charset_work = malloc (sizeof (*temp_charset_work));
315 if (control_flag == 1)
316 {
317 memset (temp_charset_work->table.decoder, -1,
318 sizeof (int) * 0x10000);
319 }
320 else
321 {
322 memset (temp_charset_work->table.encoder, 0,
323 sizeof (unsigned short) * 0x20000);
324 temp_charset_work->zero_index_char = -1;
325 }
326 temp_charset_work->current = charset;
327 temp_charset_work->for_encoder = (control_flag == 2);
328 control_flag += 2;
329 }
330 charset_map_loaded = 1;
331 }
332
333 min_char = max_char = entries->entry[0].c;
334 nonascii_min_char = MAX_CHAR;
335 for (i = 0; i < n_entries; i++)
336 {
337 unsigned from, to;
338 int from_index, to_index;
339 int from_c, to_c;
340 int idx = i % 0x10000;
341
342 if (i > 0 && idx == 0)
343 entries = entries->next;
344 from = entries->entry[idx].from;
345 to = entries->entry[idx].to;
346 from_c = entries->entry[idx].c;
347 from_index = CODE_POINT_TO_INDEX (charset, from);
348 if (from == to)
349 {
350 to_index = from_index;
351 to_c = from_c;
352 }
353 else
354 {
355 to_index = CODE_POINT_TO_INDEX (charset, to);
356 to_c = from_c + (to_index - from_index);
357 }
358 if (from_index < 0 || to_index < 0)
359 continue;
360
361 if (to_c > max_char)
362 max_char = to_c;
363 else if (from_c < min_char)
364 min_char = from_c;
365
366 if (control_flag == 1)
367 {
368 if (charset->method == CHARSET_METHOD_MAP)
369 for (; from_index <= to_index; from_index++, from_c++)
370 ASET (vec, from_index, make_number (from_c));
371 else
372 for (; from_index <= to_index; from_index++, from_c++)
373 CHAR_TABLE_SET (Vchar_unify_table,
374 CHARSET_CODE_OFFSET (charset) + from_index,
375 make_number (from_c));
376 }
377 else if (control_flag == 2)
378 {
379 if (charset->method == CHARSET_METHOD_MAP
380 && CHARSET_COMPACT_CODES_P (charset))
381 for (; from_index <= to_index; from_index++, from_c++)
382 {
383 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
384
385 if (NILP (CHAR_TABLE_REF (table, from_c)))
386 CHAR_TABLE_SET (table, from_c, make_number (code));
387 }
388 else
389 for (; from_index <= to_index; from_index++, from_c++)
390 {
391 if (NILP (CHAR_TABLE_REF (table, from_c)))
392 CHAR_TABLE_SET (table, from_c, make_number (from_index));
393 }
394 }
395 else if (control_flag == 3)
396 for (; from_index <= to_index; from_index++, from_c++)
397 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
398 else if (control_flag == 4)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
401 else /* control_flag == 0 */
402 {
403 if (ascii_compatible_p)
404 {
405 if (! ASCII_BYTE_P (from_c))
406 {
407 if (from_c < nonascii_min_char)
408 nonascii_min_char = from_c;
409 }
410 else if (! ASCII_BYTE_P (to_c))
411 {
412 nonascii_min_char = 0x80;
413 }
414 }
415
416 for (; from_c <= to_c; from_c++)
417 CHARSET_FAST_MAP_SET (from_c, fast_map);
418 }
419 }
420
421 if (control_flag == 0)
422 {
423 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
424 ? nonascii_min_char : min_char);
425 CHARSET_MAX_CHAR (charset) = max_char;
426 }
427 else if (control_flag == 4)
428 {
429 temp_charset_work->min_char = min_char;
430 temp_charset_work->max_char = max_char;
431 }
432 }
433
434
435 /* Read a hexadecimal number (preceded by "0x") from the file FP while
436 paying attention to comment charcter '#'. */
437
438 static INLINE unsigned
439 read_hex (FILE *fp, int *eof)
440 {
441 int c;
442 unsigned n;
443
444 while ((c = getc (fp)) != EOF)
445 {
446 if (c == '#')
447 {
448 while ((c = getc (fp)) != EOF && c != '\n');
449 }
450 else if (c == '0')
451 {
452 if ((c = getc (fp)) == EOF || c == 'x')
453 break;
454 }
455 }
456 if (c == EOF)
457 {
458 *eof = 1;
459 return 0;
460 }
461 *eof = 0;
462 n = 0;
463 if (c == 'x')
464 while ((c = getc (fp)) != EOF && isxdigit (c))
465 n = ((n << 4)
466 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
467 else
468 while ((c = getc (fp)) != EOF && isdigit (c))
469 n = (n * 10) + c - '0';
470 if (c != EOF)
471 ungetc (c, fp);
472 return n;
473 }
474
475 extern Lisp_Object Qfile_name_handler_alist;
476
477 /* Return a mapping vector for CHARSET loaded from MAPFILE.
478 Each line of MAPFILE has this form
479 0xAAAA 0xCCCC
480 where 0xAAAA is a code-point and 0xCCCC is the corresponding
481 character code, or this form
482 0xAAAA-0xBBBB 0xCCCC
483 where 0xAAAA and 0xBBBB are code-points specifying a range, and
484 0xCCCC is the first character code of the range.
485
486 The returned vector has this form:
487 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
488 where CODE1 is a code-point or a cons of code-points specifying a
489 range.
490
491 Note that this function uses `openp' to open MAPFILE but ignores
492 `file-name-handler-alist' to avoid running any Lisp code. */
493
494 extern void add_to_log (char *, Lisp_Object, Lisp_Object);
495
496 static void
497 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
498 {
499 unsigned min_code = CHARSET_MIN_CODE (charset);
500 unsigned max_code = CHARSET_MAX_CODE (charset);
501 int fd;
502 FILE *fp;
503 int eof;
504 Lisp_Object suffixes;
505 struct charset_map_entries *head, *entries;
506 int n_entries, count;
507 USE_SAFE_ALLOCA;
508
509 suffixes = Fcons (build_string (".map"),
510 Fcons (build_string (".TXT"), Qnil));
511
512 count = SPECPDL_INDEX ();
513 specbind (Qfile_name_handler_alist, Qnil);
514 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
515 unbind_to (count, Qnil);
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 error ("Failure in loading charset map: %S", SDATA (mapfile));
519
520 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
521 large (larger than MAX_ALLOCA). */
522 SAFE_ALLOCA (head, struct charset_map_entries *,
523 sizeof (struct charset_map_entries));
524 entries = head;
525 memset (entries, 0, sizeof (struct charset_map_entries));
526
527 n_entries = 0;
528 eof = 0;
529 while (1)
530 {
531 unsigned from, to;
532 int c;
533 int idx;
534
535 from = read_hex (fp, &eof);
536 if (eof)
537 break;
538 if (getc (fp) == '-')
539 to = read_hex (fp, &eof);
540 else
541 to = from;
542 c = (int) read_hex (fp, &eof);
543
544 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
545 continue;
546
547 if (n_entries > 0 && (n_entries % 0x10000) == 0)
548 {
549 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
550 sizeof (struct charset_map_entries));
551 entries = entries->next;
552 memset (entries, 0, sizeof (struct charset_map_entries));
553 }
554 idx = n_entries % 0x10000;
555 entries->entry[idx].from = from;
556 entries->entry[idx].to = to;
557 entries->entry[idx].c = c;
558 n_entries++;
559 }
560 fclose (fp);
561
562 load_charset_map (charset, head, n_entries, control_flag);
563 SAFE_FREE ();
564 }
565
566 static void
567 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
568 {
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
575 USE_SAFE_ALLOCA;
576
577 if (len % 2 == 1)
578 {
579 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
580 return;
581 }
582
583 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
584 large (larger than MAX_ALLOCA). */
585 SAFE_ALLOCA (head, struct charset_map_entries *,
586 sizeof (struct charset_map_entries));
587 entries = head;
588 memset (entries, 0, sizeof (struct charset_map_entries));
589
590 n_entries = 0;
591 for (i = 0; i < len; i += 2)
592 {
593 Lisp_Object val, val2;
594 unsigned from, to;
595 int c;
596 int idx;
597
598 val = AREF (vec, i);
599 if (CONSP (val))
600 {
601 val2 = XCDR (val);
602 val = XCAR (val);
603 CHECK_NATNUM (val);
604 CHECK_NATNUM (val2);
605 from = XFASTINT (val);
606 to = XFASTINT (val2);
607 }
608 else
609 {
610 CHECK_NATNUM (val);
611 from = to = XFASTINT (val);
612 }
613 val = AREF (vec, i + 1);
614 CHECK_NATNUM (val);
615 c = XFASTINT (val);
616
617 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
618 continue;
619
620 if (n_entries > 0 && (n_entries % 0x10000) == 0)
621 {
622 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
623 sizeof (struct charset_map_entries));
624 entries = entries->next;
625 memset (entries, 0, sizeof (struct charset_map_entries));
626 }
627 idx = n_entries % 0x10000;
628 entries->entry[idx].from = from;
629 entries->entry[idx].to = to;
630 entries->entry[idx].c = c;
631 n_entries++;
632 }
633
634 load_charset_map (charset, head, n_entries, control_flag);
635 SAFE_FREE ();
636 }
637
638
639 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
640 map it is (see the comment of load_charset_map for the detail). */
641
642 static void
643 load_charset (struct charset *charset, int control_flag)
644 {
645 Lisp_Object map;
646
647 if (inhibit_load_charset_map
648 && temp_charset_work
649 && charset == temp_charset_work->current
650 && ((control_flag == 2) == temp_charset_work->for_encoder))
651 return;
652
653 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
654 map = CHARSET_MAP (charset);
655 else if (CHARSET_UNIFIED_P (charset))
656 map = CHARSET_UNIFY_MAP (charset);
657 if (STRINGP (map))
658 load_charset_map_from_file (charset, map, control_flag);
659 else
660 load_charset_map_from_vector (charset, map, control_flag);
661 }
662
663
664 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
665 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
666 (Lisp_Object object)
667 {
668 return (CHARSETP (object) ? Qt : Qnil);
669 }
670
671
672 void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
673 Lisp_Object function, Lisp_Object arg,
674 unsigned from, unsigned to);
675
676 void
677 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
678 {
679 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
680 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
681 Lisp_Object range;
682 int c, stop;
683 struct gcpro gcpro1;
684
685 range = Fcons (Qnil, Qnil);
686 GCPRO1 (range);
687
688 c = temp_charset_work->min_char;
689 stop = (temp_charset_work->max_char < 0x20000
690 ? temp_charset_work->max_char : 0xFFFF);
691
692 while (1)
693 {
694 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
695
696 if (index >= from_idx && index <= to_idx)
697 {
698 if (NILP (XCAR (range)))
699 XSETCAR (range, make_number (c));
700 }
701 else if (! NILP (XCAR (range)))
702 {
703 XSETCDR (range, make_number (c - 1));
704 if (c_function)
705 (*c_function) (arg, range);
706 else
707 call2 (function, range, arg);
708 XSETCAR (range, Qnil);
709 }
710 if (c == stop)
711 {
712 if (c == temp_charset_work->max_char)
713 {
714 if (! NILP (XCAR (range)))
715 {
716 XSETCDR (range, make_number (c));
717 if (c_function)
718 (*c_function) (arg, range);
719 else
720 call2 (function, range, arg);
721 }
722 break;
723 }
724 c = 0x1FFFF;
725 stop = temp_charset_work->max_char;
726 }
727 c++;
728 }
729 UNGCPRO;
730 }
731
732 void
733 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
734 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
735 {
736 Lisp_Object range;
737 int partial;
738
739 partial = (from > CHARSET_MIN_CODE (charset)
740 || to < CHARSET_MAX_CODE (charset));
741
742 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
743 {
744 int from_idx = CODE_POINT_TO_INDEX (charset, from);
745 int to_idx = CODE_POINT_TO_INDEX (charset, to);
746 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
747 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
748
749 if (CHARSET_UNIFIED_P (charset))
750 {
751 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
752 load_charset (charset, 2);
753 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
754 map_char_table_for_charset (c_function, function,
755 CHARSET_DEUNIFIER (charset), arg,
756 partial ? charset : NULL, from, to);
757 else
758 map_charset_for_dump (c_function, function, arg, from, to);
759 }
760
761 range = Fcons (make_number (from_c), make_number (to_c));
762 if (NILP (function))
763 (*c_function) (arg, range);
764 else
765 call2 (function, range, arg);
766 }
767 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
768 {
769 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
770 load_charset (charset, 2);
771 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
772 map_char_table_for_charset (c_function, function,
773 CHARSET_ENCODER (charset), arg,
774 partial ? charset : NULL, from, to);
775 else
776 map_charset_for_dump (c_function, function, arg, from, to);
777 }
778 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
779 {
780 Lisp_Object subset_info;
781 int offset;
782
783 subset_info = CHARSET_SUBSET (charset);
784 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
785 offset = XINT (AREF (subset_info, 3));
786 from -= offset;
787 if (from < XFASTINT (AREF (subset_info, 1)))
788 from = XFASTINT (AREF (subset_info, 1));
789 to -= offset;
790 if (to > XFASTINT (AREF (subset_info, 2)))
791 to = XFASTINT (AREF (subset_info, 2));
792 map_charset_chars (c_function, function, arg, charset, from, to);
793 }
794 else /* i.e. CHARSET_METHOD_SUPERSET */
795 {
796 Lisp_Object parents;
797
798 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
799 parents = XCDR (parents))
800 {
801 int offset;
802 unsigned this_from, this_to;
803
804 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
805 offset = XINT (XCDR (XCAR (parents)));
806 this_from = from > offset ? from - offset : 0;
807 this_to = to > offset ? to - offset : 0;
808 if (this_from < CHARSET_MIN_CODE (charset))
809 this_from = CHARSET_MIN_CODE (charset);
810 if (this_to > CHARSET_MAX_CODE (charset))
811 this_to = CHARSET_MAX_CODE (charset);
812 map_charset_chars (c_function, function, arg, charset,
813 this_from, this_to);
814 }
815 }
816 }
817
818 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
819 doc: /* Call FUNCTION for all characters in CHARSET.
820 FUNCTION is called with an argument RANGE and the optional 3rd
821 argument ARG.
822
823 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
824 characters contained in CHARSET.
825
826 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
827 range of code points (in CHARSET) of target characters. */)
828 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
829 {
830 struct charset *cs;
831 unsigned from, to;
832
833 CHECK_CHARSET_GET_CHARSET (charset, cs);
834 if (NILP (from_code))
835 from = CHARSET_MIN_CODE (cs);
836 else
837 {
838 CHECK_NATNUM (from_code);
839 from = XINT (from_code);
840 if (from < CHARSET_MIN_CODE (cs))
841 from = CHARSET_MIN_CODE (cs);
842 }
843 if (NILP (to_code))
844 to = CHARSET_MAX_CODE (cs);
845 else
846 {
847 CHECK_NATNUM (to_code);
848 to = XINT (to_code);
849 if (to > CHARSET_MAX_CODE (cs))
850 to = CHARSET_MAX_CODE (cs);
851 }
852 map_charset_chars (NULL, function, arg, cs, from, to);
853 return Qnil;
854 }
855
856
857 /* Define a charset according to the arguments. The Nth argument is
858 the Nth attribute of the charset (the last attribute `charset-id'
859 is not included). See the docstring of `define-charset' for the
860 detail. */
861
862 DEFUN ("define-charset-internal", Fdefine_charset_internal,
863 Sdefine_charset_internal, charset_arg_max, MANY, 0,
864 doc: /* For internal use only.
865 usage: (define-charset-internal ...) */)
866 (int nargs, Lisp_Object *args)
867 {
868 /* Charset attr vector. */
869 Lisp_Object attrs;
870 Lisp_Object val;
871 unsigned hash_code;
872 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
873 int i, j;
874 struct charset charset;
875 int id;
876 int dimension;
877 int new_definition_p;
878 int nchars;
879
880 if (nargs != charset_arg_max)
881 return Fsignal (Qwrong_number_of_arguments,
882 Fcons (intern ("define-charset-internal"),
883 make_number (nargs)));
884
885 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
886
887 CHECK_SYMBOL (args[charset_arg_name]);
888 ASET (attrs, charset_name, args[charset_arg_name]);
889
890 val = args[charset_arg_code_space];
891 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
892 {
893 int min_byte, max_byte;
894
895 min_byte = XINT (Faref (val, make_number (i * 2)));
896 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
897 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
898 error ("Invalid :code-space value");
899 charset.code_space[i * 4] = min_byte;
900 charset.code_space[i * 4 + 1] = max_byte;
901 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
902 nchars *= charset.code_space[i * 4 + 2];
903 charset.code_space[i * 4 + 3] = nchars;
904 if (max_byte > 0)
905 dimension = i + 1;
906 }
907
908 val = args[charset_arg_dimension];
909 if (NILP (val))
910 charset.dimension = dimension;
911 else
912 {
913 CHECK_NATNUM (val);
914 charset.dimension = XINT (val);
915 if (charset.dimension < 1 || charset.dimension > 4)
916 args_out_of_range_3 (val, make_number (1), make_number (4));
917 }
918
919 charset.code_linear_p
920 = (charset.dimension == 1
921 || (charset.code_space[2] == 256
922 && (charset.dimension == 2
923 || (charset.code_space[6] == 256
924 && (charset.dimension == 3
925 || charset.code_space[10] == 256)))));
926
927 if (! charset.code_linear_p)
928 {
929 charset.code_space_mask = (unsigned char *) xmalloc (256);
930 memset (charset.code_space_mask, 0, 256);
931 for (i = 0; i < 4; i++)
932 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
933 j++)
934 charset.code_space_mask[j] |= (1 << i);
935 }
936
937 charset.iso_chars_96 = charset.code_space[2] == 96;
938
939 charset.min_code = (charset.code_space[0]
940 | (charset.code_space[4] << 8)
941 | (charset.code_space[8] << 16)
942 | (charset.code_space[12] << 24));
943 charset.max_code = (charset.code_space[1]
944 | (charset.code_space[5] << 8)
945 | (charset.code_space[9] << 16)
946 | (charset.code_space[13] << 24));
947 charset.char_index_offset = 0;
948
949 val = args[charset_arg_min_code];
950 if (! NILP (val))
951 {
952 unsigned code;
953
954 if (INTEGERP (val))
955 code = XINT (val);
956 else
957 {
958 CHECK_CONS (val);
959 CHECK_NUMBER_CAR (val);
960 CHECK_NUMBER_CDR (val);
961 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
962 }
963 if (code < charset.min_code
964 || code > charset.max_code)
965 args_out_of_range_3 (make_number (charset.min_code),
966 make_number (charset.max_code), val);
967 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
968 charset.min_code = code;
969 }
970
971 val = args[charset_arg_max_code];
972 if (! NILP (val))
973 {
974 unsigned code;
975
976 if (INTEGERP (val))
977 code = XINT (val);
978 else
979 {
980 CHECK_CONS (val);
981 CHECK_NUMBER_CAR (val);
982 CHECK_NUMBER_CDR (val);
983 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
984 }
985 if (code < charset.min_code
986 || code > charset.max_code)
987 args_out_of_range_3 (make_number (charset.min_code),
988 make_number (charset.max_code), val);
989 charset.max_code = code;
990 }
991
992 charset.compact_codes_p = charset.max_code < 0x10000;
993
994 val = args[charset_arg_invalid_code];
995 if (NILP (val))
996 {
997 if (charset.min_code > 0)
998 charset.invalid_code = 0;
999 else
1000 {
1001 XSETINT (val, charset.max_code + 1);
1002 if (XINT (val) == charset.max_code + 1)
1003 charset.invalid_code = charset.max_code + 1;
1004 else
1005 error ("Attribute :invalid-code must be specified");
1006 }
1007 }
1008 else
1009 {
1010 CHECK_NATNUM (val);
1011 charset.invalid_code = XFASTINT (val);
1012 }
1013
1014 val = args[charset_arg_iso_final];
1015 if (NILP (val))
1016 charset.iso_final = -1;
1017 else
1018 {
1019 CHECK_NUMBER (val);
1020 if (XINT (val) < '0' || XINT (val) > 127)
1021 error ("Invalid iso-final-char: %d", XINT (val));
1022 charset.iso_final = XINT (val);
1023 }
1024
1025 val = args[charset_arg_iso_revision];
1026 if (NILP (val))
1027 charset.iso_revision = -1;
1028 else
1029 {
1030 CHECK_NUMBER (val);
1031 if (XINT (val) > 63)
1032 args_out_of_range (make_number (63), val);
1033 charset.iso_revision = XINT (val);
1034 }
1035
1036 val = args[charset_arg_emacs_mule_id];
1037 if (NILP (val))
1038 charset.emacs_mule_id = -1;
1039 else
1040 {
1041 CHECK_NATNUM (val);
1042 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1043 error ("Invalid emacs-mule-id: %d", XINT (val));
1044 charset.emacs_mule_id = XINT (val);
1045 }
1046
1047 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1048
1049 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1050
1051 charset.unified_p = 0;
1052
1053 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1054
1055 if (! NILP (args[charset_arg_code_offset]))
1056 {
1057 val = args[charset_arg_code_offset];
1058 CHECK_NUMBER (val);
1059
1060 charset.method = CHARSET_METHOD_OFFSET;
1061 charset.code_offset = XINT (val);
1062
1063 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1064 charset.min_char = i + charset.code_offset;
1065 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1066 charset.max_char = i + charset.code_offset;
1067 if (charset.max_char > MAX_CHAR)
1068 error ("Unsupported max char: %d", charset.max_char);
1069
1070 i = (charset.min_char >> 7) << 7;
1071 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1072 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1073 i = (i >> 12) << 12;
1074 for (; i <= charset.max_char; i += 0x1000)
1075 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1076 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1077 charset.ascii_compatible_p = 1;
1078 }
1079 else if (! NILP (args[charset_arg_map]))
1080 {
1081 val = args[charset_arg_map];
1082 ASET (attrs, charset_map, val);
1083 charset.method = CHARSET_METHOD_MAP;
1084 }
1085 else if (! NILP (args[charset_arg_subset]))
1086 {
1087 Lisp_Object parent;
1088 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1089 struct charset *parent_charset;
1090
1091 val = args[charset_arg_subset];
1092 parent = Fcar (val);
1093 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1094 parent_min_code = Fnth (make_number (1), val);
1095 CHECK_NATNUM (parent_min_code);
1096 parent_max_code = Fnth (make_number (2), val);
1097 CHECK_NATNUM (parent_max_code);
1098 parent_code_offset = Fnth (make_number (3), val);
1099 CHECK_NUMBER (parent_code_offset);
1100 val = Fmake_vector (make_number (4), Qnil);
1101 ASET (val, 0, make_number (parent_charset->id));
1102 ASET (val, 1, parent_min_code);
1103 ASET (val, 2, parent_max_code);
1104 ASET (val, 3, parent_code_offset);
1105 ASET (attrs, charset_subset, val);
1106
1107 charset.method = CHARSET_METHOD_SUBSET;
1108 /* Here, we just copy the parent's fast_map. It's not accurate,
1109 but at least it works for quickly detecting which character
1110 DOESN'T belong to this charset. */
1111 for (i = 0; i < 190; i++)
1112 charset.fast_map[i] = parent_charset->fast_map[i];
1113
1114 /* We also copy these for parents. */
1115 charset.min_char = parent_charset->min_char;
1116 charset.max_char = parent_charset->max_char;
1117 }
1118 else if (! NILP (args[charset_arg_superset]))
1119 {
1120 val = args[charset_arg_superset];
1121 charset.method = CHARSET_METHOD_SUPERSET;
1122 val = Fcopy_sequence (val);
1123 ASET (attrs, charset_superset, val);
1124
1125 charset.min_char = MAX_CHAR;
1126 charset.max_char = 0;
1127 for (; ! NILP (val); val = Fcdr (val))
1128 {
1129 Lisp_Object elt, car_part, cdr_part;
1130 int this_id, offset;
1131 struct charset *this_charset;
1132
1133 elt = Fcar (val);
1134 if (CONSP (elt))
1135 {
1136 car_part = XCAR (elt);
1137 cdr_part = XCDR (elt);
1138 CHECK_CHARSET_GET_ID (car_part, this_id);
1139 CHECK_NUMBER (cdr_part);
1140 offset = XINT (cdr_part);
1141 }
1142 else
1143 {
1144 CHECK_CHARSET_GET_ID (elt, this_id);
1145 offset = 0;
1146 }
1147 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1148
1149 this_charset = CHARSET_FROM_ID (this_id);
1150 if (charset.min_char > this_charset->min_char)
1151 charset.min_char = this_charset->min_char;
1152 if (charset.max_char < this_charset->max_char)
1153 charset.max_char = this_charset->max_char;
1154 for (i = 0; i < 190; i++)
1155 charset.fast_map[i] |= this_charset->fast_map[i];
1156 }
1157 }
1158 else
1159 error ("None of :code-offset, :map, :parents are specified");
1160
1161 val = args[charset_arg_unify_map];
1162 if (! NILP (val) && !STRINGP (val))
1163 CHECK_VECTOR (val);
1164 ASET (attrs, charset_unify_map, val);
1165
1166 CHECK_LIST (args[charset_arg_plist]);
1167 ASET (attrs, charset_plist, args[charset_arg_plist]);
1168
1169 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1170 &hash_code);
1171 if (charset.hash_index >= 0)
1172 {
1173 new_definition_p = 0;
1174 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1175 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1176 }
1177 else
1178 {
1179 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1180 hash_code);
1181 if (charset_table_used == charset_table_size)
1182 {
1183 struct charset *new_table
1184 = (struct charset *) xmalloc (sizeof (struct charset)
1185 * (charset_table_size + 16));
1186 memcpy (new_table, charset_table,
1187 sizeof (struct charset) * charset_table_size);
1188 charset_table_size += 16;
1189 charset_table = new_table;
1190 }
1191 id = charset_table_used++;
1192 new_definition_p = 1;
1193 }
1194
1195 ASET (attrs, charset_id, make_number (id));
1196 charset.id = id;
1197 charset_table[id] = charset;
1198
1199 if (charset.method == CHARSET_METHOD_MAP)
1200 {
1201 load_charset (&charset, 0);
1202 charset_table[id] = charset;
1203 }
1204
1205 if (charset.iso_final >= 0)
1206 {
1207 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1208 charset.iso_final) = id;
1209 if (new_definition_p)
1210 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1211 Fcons (make_number (id), Qnil));
1212 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1213 charset_jisx0201_roman = id;
1214 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1215 charset_jisx0208_1978 = id;
1216 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1217 charset_jisx0208 = id;
1218 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1219 charset_ksc5601 = id;
1220 }
1221
1222 if (charset.emacs_mule_id >= 0)
1223 {
1224 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1225 if (charset.emacs_mule_id < 0xA0)
1226 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1227 else
1228 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1229 if (new_definition_p)
1230 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1231 Fcons (make_number (id), Qnil));
1232 }
1233
1234 if (new_definition_p)
1235 {
1236 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1237 if (charset.supplementary_p)
1238 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1239 Fcons (make_number (id), Qnil));
1240 else
1241 {
1242 Lisp_Object tail;
1243
1244 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1245 {
1246 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1247
1248 if (cs->supplementary_p)
1249 break;
1250 }
1251 if (EQ (tail, Vcharset_ordered_list))
1252 Vcharset_ordered_list = Fcons (make_number (id),
1253 Vcharset_ordered_list);
1254 else if (NILP (tail))
1255 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1256 Fcons (make_number (id), Qnil));
1257 else
1258 {
1259 val = Fcons (XCAR (tail), XCDR (tail));
1260 XSETCDR (tail, val);
1261 XSETCAR (tail, make_number (id));
1262 }
1263 }
1264 charset_ordered_list_tick++;
1265 }
1266
1267 return Qnil;
1268 }
1269
1270
1271 /* Same as Fdefine_charset_internal but arguments are more convenient
1272 to call from C (typically in syms_of_charset). This can define a
1273 charset of `offset' method only. Return the ID of the new
1274 charset. */
1275
1276 static int
1277 define_charset_internal (Lisp_Object name,
1278 int dimension,
1279 unsigned char *code_space,
1280 unsigned min_code, unsigned max_code,
1281 int iso_final, int iso_revision, int emacs_mule_id,
1282 int ascii_compatible, int supplementary,
1283 int code_offset)
1284 {
1285 Lisp_Object args[charset_arg_max];
1286 Lisp_Object plist[14];
1287 Lisp_Object val;
1288 int i;
1289
1290 args[charset_arg_name] = name;
1291 args[charset_arg_dimension] = make_number (dimension);
1292 val = Fmake_vector (make_number (8), make_number (0));
1293 for (i = 0; i < 8; i++)
1294 ASET (val, i, make_number (code_space[i]));
1295 args[charset_arg_code_space] = val;
1296 args[charset_arg_min_code] = make_number (min_code);
1297 args[charset_arg_max_code] = make_number (max_code);
1298 args[charset_arg_iso_final]
1299 = (iso_final < 0 ? Qnil : make_number (iso_final));
1300 args[charset_arg_iso_revision] = make_number (iso_revision);
1301 args[charset_arg_emacs_mule_id]
1302 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1303 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1304 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1305 args[charset_arg_invalid_code] = Qnil;
1306 args[charset_arg_code_offset] = make_number (code_offset);
1307 args[charset_arg_map] = Qnil;
1308 args[charset_arg_subset] = Qnil;
1309 args[charset_arg_superset] = Qnil;
1310 args[charset_arg_unify_map] = Qnil;
1311
1312 plist[0] = intern_c_string (":name");
1313 plist[1] = args[charset_arg_name];
1314 plist[2] = intern_c_string (":dimension");
1315 plist[3] = args[charset_arg_dimension];
1316 plist[4] = intern_c_string (":code-space");
1317 plist[5] = args[charset_arg_code_space];
1318 plist[6] = intern_c_string (":iso-final-char");
1319 plist[7] = args[charset_arg_iso_final];
1320 plist[8] = intern_c_string (":emacs-mule-id");
1321 plist[9] = args[charset_arg_emacs_mule_id];
1322 plist[10] = intern_c_string (":ascii-compatible-p");
1323 plist[11] = args[charset_arg_ascii_compatible_p];
1324 plist[12] = intern_c_string (":code-offset");
1325 plist[13] = args[charset_arg_code_offset];
1326
1327 args[charset_arg_plist] = Flist (14, plist);
1328 Fdefine_charset_internal (charset_arg_max, args);
1329
1330 return XINT (CHARSET_SYMBOL_ID (name));
1331 }
1332
1333
1334 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1335 Sdefine_charset_alias, 2, 2, 0,
1336 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1337 (Lisp_Object alias, Lisp_Object charset)
1338 {
1339 Lisp_Object attr;
1340
1341 CHECK_CHARSET_GET_ATTR (charset, attr);
1342 Fputhash (alias, attr, Vcharset_hash_table);
1343 Vcharset_list = Fcons (alias, Vcharset_list);
1344 return Qnil;
1345 }
1346
1347
1348 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1349 doc: /* Return the property list of CHARSET. */)
1350 (Lisp_Object charset)
1351 {
1352 Lisp_Object attrs;
1353
1354 CHECK_CHARSET_GET_ATTR (charset, attrs);
1355 return CHARSET_ATTR_PLIST (attrs);
1356 }
1357
1358
1359 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1360 doc: /* Set CHARSET's property list to PLIST. */)
1361 (Lisp_Object charset, Lisp_Object plist)
1362 {
1363 Lisp_Object attrs;
1364
1365 CHECK_CHARSET_GET_ATTR (charset, attrs);
1366 CHARSET_ATTR_PLIST (attrs) = plist;
1367 return plist;
1368 }
1369
1370
1371 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1372 doc: /* Unify characters of CHARSET with Unicode.
1373 This means reading the relevant file and installing the table defined
1374 by CHARSET's `:unify-map' property.
1375
1376 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1377 the same meaning as the `:unify-map' attribute in the function
1378 `define-charset' (which see).
1379
1380 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1381 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1382 {
1383 int id;
1384 struct charset *cs;
1385
1386 CHECK_CHARSET_GET_ID (charset, id);
1387 cs = CHARSET_FROM_ID (id);
1388 if (NILP (deunify)
1389 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1390 : ! CHARSET_UNIFIED_P (cs))
1391 return Qnil;
1392
1393 CHARSET_UNIFIED_P (cs) = 0;
1394 if (NILP (deunify))
1395 {
1396 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1397 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1398 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1399 if (NILP (unify_map))
1400 unify_map = CHARSET_UNIFY_MAP (cs);
1401 else
1402 {
1403 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1404 signal_error ("Bad unify-map", unify_map);
1405 CHARSET_UNIFY_MAP (cs) = unify_map;
1406 }
1407 if (NILP (Vchar_unify_table))
1408 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1409 char_table_set_range (Vchar_unify_table,
1410 cs->min_char, cs->max_char, charset);
1411 CHARSET_UNIFIED_P (cs) = 1;
1412 }
1413 else if (CHAR_TABLE_P (Vchar_unify_table))
1414 {
1415 int min_code = CHARSET_MIN_CODE (cs);
1416 int max_code = CHARSET_MAX_CODE (cs);
1417 int min_char = DECODE_CHAR (cs, min_code);
1418 int max_char = DECODE_CHAR (cs, max_code);
1419
1420 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1421 }
1422
1423 return Qnil;
1424 }
1425
1426 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1427 Sget_unused_iso_final_char, 2, 2, 0,
1428 doc: /*
1429 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1430 DIMENSION is the number of bytes to represent a character: 1 or 2.
1431 CHARS is the number of characters in a dimension: 94 or 96.
1432
1433 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1434 If there's no unused final char for the specified kind of charset,
1435 return nil. */)
1436 (Lisp_Object dimension, Lisp_Object chars)
1437 {
1438 int final_char;
1439
1440 CHECK_NUMBER (dimension);
1441 CHECK_NUMBER (chars);
1442 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1443 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1444 if (XINT (chars) != 94 && XINT (chars) != 96)
1445 args_out_of_range_3 (chars, make_number (94), make_number (96));
1446 for (final_char = '0'; final_char <= '?'; final_char++)
1447 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1448 break;
1449 return (final_char <= '?' ? make_number (final_char) : Qnil);
1450 }
1451
1452 static void
1453 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1454 {
1455 CHECK_NATNUM (dimension);
1456 CHECK_NATNUM (chars);
1457 CHECK_NATNUM (final_char);
1458
1459 if (XINT (dimension) > 3)
1460 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1461 if (XINT (chars) != 94 && XINT (chars) != 96)
1462 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1463 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1464 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1465 }
1466
1467
1468 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1469 4, 4, 0,
1470 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1471
1472 On decoding by an ISO-2022 base coding system, when a charset
1473 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1474 if CHARSET is designated instead. */)
1475 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1476 {
1477 int id;
1478 int chars_flag;
1479
1480 CHECK_CHARSET_GET_ID (charset, id);
1481 check_iso_charset_parameter (dimension, chars, final_char);
1482 chars_flag = XINT (chars) == 96;
1483 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1484 return Qnil;
1485 }
1486
1487
1488 /* Return information about charsets in the text at PTR of NBYTES
1489 bytes, which are NCHARS characters. The value is:
1490
1491 0: Each character is represented by one byte. This is always
1492 true for a unibyte string. For a multibyte string, true if
1493 it contains only ASCII characters.
1494
1495 1: No charsets other than ascii, control-1, and latin-1 are
1496 found.
1497
1498 2: Otherwise.
1499 */
1500
1501 int
1502 string_xstring_p (Lisp_Object string)
1503 {
1504 const unsigned char *p = SDATA (string);
1505 const unsigned char *endp = p + SBYTES (string);
1506
1507 if (SCHARS (string) == SBYTES (string))
1508 return 0;
1509
1510 while (p < endp)
1511 {
1512 int c = STRING_CHAR_ADVANCE (p);
1513
1514 if (c >= 0x100)
1515 return 2;
1516 }
1517 return 1;
1518 }
1519
1520
1521 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1522
1523 CHARSETS is a vector. If Nth element is non-nil, it means the
1524 charset whose id is N is already found.
1525
1526 It may lookup a translation table TABLE if supplied. */
1527
1528 static void
1529 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1530 {
1531 const unsigned char *pend = ptr + nbytes;
1532
1533 if (nchars == nbytes)
1534 {
1535 if (multibyte)
1536 ASET (charsets, charset_ascii, Qt);
1537 else
1538 while (ptr < pend)
1539 {
1540 int c = *ptr++;
1541
1542 if (!NILP (table))
1543 c = translate_char (table, c);
1544 if (ASCII_BYTE_P (c))
1545 ASET (charsets, charset_ascii, Qt);
1546 else
1547 ASET (charsets, charset_eight_bit, Qt);
1548 }
1549 }
1550 else
1551 {
1552 while (ptr < pend)
1553 {
1554 int c = STRING_CHAR_ADVANCE (ptr);
1555 struct charset *charset;
1556
1557 if (!NILP (table))
1558 c = translate_char (table, c);
1559 charset = CHAR_CHARSET (c);
1560 ASET (charsets, CHARSET_ID (charset), Qt);
1561 }
1562 }
1563 }
1564
1565 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1566 2, 3, 0,
1567 doc: /* Return a list of charsets in the region between BEG and END.
1568 BEG and END are buffer positions.
1569 Optional arg TABLE if non-nil is a translation table to look up.
1570
1571 If the current buffer is unibyte, the returned list may contain
1572 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1573 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1574 {
1575 Lisp_Object charsets;
1576 EMACS_INT from, from_byte, to, stop, stop_byte;
1577 int i;
1578 Lisp_Object val;
1579 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1580
1581 validate_region (&beg, &end);
1582 from = XFASTINT (beg);
1583 stop = to = XFASTINT (end);
1584
1585 if (from < GPT && GPT < to)
1586 {
1587 stop = GPT;
1588 stop_byte = GPT_BYTE;
1589 }
1590 else
1591 stop_byte = CHAR_TO_BYTE (stop);
1592
1593 from_byte = CHAR_TO_BYTE (from);
1594
1595 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1596 while (1)
1597 {
1598 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1599 stop_byte - from_byte, charsets, table,
1600 multibyte);
1601 if (stop < to)
1602 {
1603 from = stop, from_byte = stop_byte;
1604 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1605 }
1606 else
1607 break;
1608 }
1609
1610 val = Qnil;
1611 for (i = charset_table_used - 1; i >= 0; i--)
1612 if (!NILP (AREF (charsets, i)))
1613 val = Fcons (CHARSET_NAME (charset_table + i), val);
1614 return val;
1615 }
1616
1617 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1618 1, 2, 0,
1619 doc: /* Return a list of charsets in STR.
1620 Optional arg TABLE if non-nil is a translation table to look up.
1621
1622 If STR is unibyte, the returned list may contain
1623 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1624 (Lisp_Object str, Lisp_Object table)
1625 {
1626 Lisp_Object charsets;
1627 int i;
1628 Lisp_Object val;
1629
1630 CHECK_STRING (str);
1631
1632 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1633 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1634 charsets, table,
1635 STRING_MULTIBYTE (str));
1636 val = Qnil;
1637 for (i = charset_table_used - 1; i >= 0; i--)
1638 if (!NILP (AREF (charsets, i)))
1639 val = Fcons (CHARSET_NAME (charset_table + i), val);
1640 return val;
1641 }
1642
1643 \f
1644
1645 /* Return a unified character code for C (>= 0x110000). VAL is a
1646 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1647 charset symbol. */
1648 int
1649 maybe_unify_char (int c, Lisp_Object val)
1650 {
1651 struct charset *charset;
1652
1653 if (INTEGERP (val))
1654 return XINT (val);
1655 if (NILP (val))
1656 return c;
1657
1658 CHECK_CHARSET_GET_CHARSET (val, charset);
1659 load_charset (charset, 1);
1660 if (! inhibit_load_charset_map)
1661 {
1662 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1663 if (! NILP (val))
1664 c = XINT (val);
1665 }
1666 else
1667 {
1668 int code_index = c - CHARSET_CODE_OFFSET (charset);
1669 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1670
1671 if (unified > 0)
1672 c = unified;
1673 }
1674 return c;
1675 }
1676
1677
1678 /* Return a character correponding to the code-point CODE of
1679 CHARSET. */
1680
1681 int
1682 decode_char (struct charset *charset, unsigned int code)
1683 {
1684 int c, char_index;
1685 enum charset_method method = CHARSET_METHOD (charset);
1686
1687 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1688 return -1;
1689
1690 if (method == CHARSET_METHOD_SUBSET)
1691 {
1692 Lisp_Object subset_info;
1693
1694 subset_info = CHARSET_SUBSET (charset);
1695 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1696 code -= XINT (AREF (subset_info, 3));
1697 if (code < XFASTINT (AREF (subset_info, 1))
1698 || code > XFASTINT (AREF (subset_info, 2)))
1699 c = -1;
1700 else
1701 c = DECODE_CHAR (charset, code);
1702 }
1703 else if (method == CHARSET_METHOD_SUPERSET)
1704 {
1705 Lisp_Object parents;
1706
1707 parents = CHARSET_SUPERSET (charset);
1708 c = -1;
1709 for (; CONSP (parents); parents = XCDR (parents))
1710 {
1711 int id = XINT (XCAR (XCAR (parents)));
1712 int code_offset = XINT (XCDR (XCAR (parents)));
1713 unsigned this_code = code - code_offset;
1714
1715 charset = CHARSET_FROM_ID (id);
1716 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1717 break;
1718 }
1719 }
1720 else
1721 {
1722 char_index = CODE_POINT_TO_INDEX (charset, code);
1723 if (char_index < 0)
1724 return -1;
1725
1726 if (method == CHARSET_METHOD_MAP)
1727 {
1728 Lisp_Object decoder;
1729
1730 decoder = CHARSET_DECODER (charset);
1731 if (! VECTORP (decoder))
1732 {
1733 load_charset (charset, 1);
1734 decoder = CHARSET_DECODER (charset);
1735 }
1736 if (VECTORP (decoder))
1737 c = XINT (AREF (decoder, char_index));
1738 else
1739 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1740 }
1741 else /* method == CHARSET_METHOD_OFFSET */
1742 {
1743 c = char_index + CHARSET_CODE_OFFSET (charset);
1744 if (CHARSET_UNIFIED_P (charset)
1745 && c > MAX_UNICODE_CHAR)
1746 MAYBE_UNIFY_CHAR (c);
1747 }
1748 }
1749
1750 return c;
1751 }
1752
1753 /* Variable used temporarily by the macro ENCODE_CHAR. */
1754 Lisp_Object charset_work;
1755
1756 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1757 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1758 use CHARSET's strict_max_char instead of max_char. */
1759
1760 unsigned
1761 encode_char (struct charset *charset, int c)
1762 {
1763 unsigned code;
1764 enum charset_method method = CHARSET_METHOD (charset);
1765
1766 if (CHARSET_UNIFIED_P (charset))
1767 {
1768 Lisp_Object deunifier;
1769 int code_index = -1;
1770
1771 deunifier = CHARSET_DEUNIFIER (charset);
1772 if (! CHAR_TABLE_P (deunifier))
1773 {
1774 load_charset (charset, 2);
1775 deunifier = CHARSET_DEUNIFIER (charset);
1776 }
1777 if (CHAR_TABLE_P (deunifier))
1778 {
1779 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1780
1781 if (INTEGERP (deunified))
1782 code_index = XINT (deunified);
1783 }
1784 else
1785 {
1786 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1787 }
1788 if (code_index >= 0)
1789 c = CHARSET_CODE_OFFSET (charset) + code_index;
1790 }
1791
1792 if (method == CHARSET_METHOD_SUBSET)
1793 {
1794 Lisp_Object subset_info;
1795 struct charset *this_charset;
1796
1797 subset_info = CHARSET_SUBSET (charset);
1798 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1799 code = ENCODE_CHAR (this_charset, c);
1800 if (code == CHARSET_INVALID_CODE (this_charset)
1801 || code < XFASTINT (AREF (subset_info, 1))
1802 || code > XFASTINT (AREF (subset_info, 2)))
1803 return CHARSET_INVALID_CODE (charset);
1804 code += XINT (AREF (subset_info, 3));
1805 return code;
1806 }
1807
1808 if (method == CHARSET_METHOD_SUPERSET)
1809 {
1810 Lisp_Object parents;
1811
1812 parents = CHARSET_SUPERSET (charset);
1813 for (; CONSP (parents); parents = XCDR (parents))
1814 {
1815 int id = XINT (XCAR (XCAR (parents)));
1816 int code_offset = XINT (XCDR (XCAR (parents)));
1817 struct charset *this_charset = CHARSET_FROM_ID (id);
1818
1819 code = ENCODE_CHAR (this_charset, c);
1820 if (code != CHARSET_INVALID_CODE (this_charset))
1821 return code + code_offset;
1822 }
1823 return CHARSET_INVALID_CODE (charset);
1824 }
1825
1826 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1827 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1828 return CHARSET_INVALID_CODE (charset);
1829
1830 if (method == CHARSET_METHOD_MAP)
1831 {
1832 Lisp_Object encoder;
1833 Lisp_Object val;
1834
1835 encoder = CHARSET_ENCODER (charset);
1836 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1837 {
1838 load_charset (charset, 2);
1839 encoder = CHARSET_ENCODER (charset);
1840 }
1841 if (CHAR_TABLE_P (encoder))
1842 {
1843 val = CHAR_TABLE_REF (encoder, c);
1844 if (NILP (val))
1845 return CHARSET_INVALID_CODE (charset);
1846 code = XINT (val);
1847 if (! CHARSET_COMPACT_CODES_P (charset))
1848 code = INDEX_TO_CODE_POINT (charset, code);
1849 }
1850 else
1851 {
1852 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1853 code = INDEX_TO_CODE_POINT (charset, code);
1854 }
1855 }
1856 else /* method == CHARSET_METHOD_OFFSET */
1857 {
1858 int code_index = c - CHARSET_CODE_OFFSET (charset);
1859
1860 code = INDEX_TO_CODE_POINT (charset, code_index);
1861 }
1862
1863 return code;
1864 }
1865
1866
1867 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1868 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1869 Return nil if CODE-POINT is not valid in CHARSET.
1870
1871 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1872
1873 Optional argument RESTRICTION specifies a way to map the pair of CCS
1874 and CODE-POINT to a character. Currently not supported and just ignored. */)
1875 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1876 {
1877 int c, id;
1878 unsigned code;
1879 struct charset *charsetp;
1880
1881 CHECK_CHARSET_GET_ID (charset, id);
1882 if (CONSP (code_point))
1883 {
1884 CHECK_NATNUM_CAR (code_point);
1885 CHECK_NATNUM_CDR (code_point);
1886 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1887 }
1888 else
1889 {
1890 CHECK_NATNUM (code_point);
1891 code = XINT (code_point);
1892 }
1893 charsetp = CHARSET_FROM_ID (id);
1894 c = DECODE_CHAR (charsetp, code);
1895 return (c >= 0 ? make_number (c) : Qnil);
1896 }
1897
1898
1899 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1900 doc: /* Encode the character CH into a code-point of CHARSET.
1901 Return nil if CHARSET doesn't include CH.
1902
1903 Optional argument RESTRICTION specifies a way to map CH to a
1904 code-point in CCS. Currently not supported and just ignored. */)
1905 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1906 {
1907 int id;
1908 unsigned code;
1909 struct charset *charsetp;
1910
1911 CHECK_CHARSET_GET_ID (charset, id);
1912 CHECK_NATNUM (ch);
1913 charsetp = CHARSET_FROM_ID (id);
1914 code = ENCODE_CHAR (charsetp, XINT (ch));
1915 if (code == CHARSET_INVALID_CODE (charsetp))
1916 return Qnil;
1917 if (code > 0x7FFFFFF)
1918 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1919 return make_number (code);
1920 }
1921
1922
1923 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1924 doc:
1925 /* Return a character of CHARSET whose position codes are CODEn.
1926
1927 CODE1 through CODE4 are optional, but if you don't supply sufficient
1928 position codes, it is assumed that the minimum code in each dimension
1929 is specified. */)
1930 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1931 {
1932 int id, dimension;
1933 struct charset *charsetp;
1934 unsigned code;
1935 int c;
1936
1937 CHECK_CHARSET_GET_ID (charset, id);
1938 charsetp = CHARSET_FROM_ID (id);
1939
1940 dimension = CHARSET_DIMENSION (charsetp);
1941 if (NILP (code1))
1942 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1943 ? 0 : CHARSET_MIN_CODE (charsetp));
1944 else
1945 {
1946 CHECK_NATNUM (code1);
1947 if (XFASTINT (code1) >= 0x100)
1948 args_out_of_range (make_number (0xFF), code1);
1949 code = XFASTINT (code1);
1950
1951 if (dimension > 1)
1952 {
1953 code <<= 8;
1954 if (NILP (code2))
1955 code |= charsetp->code_space[(dimension - 2) * 4];
1956 else
1957 {
1958 CHECK_NATNUM (code2);
1959 if (XFASTINT (code2) >= 0x100)
1960 args_out_of_range (make_number (0xFF), code2);
1961 code |= XFASTINT (code2);
1962 }
1963
1964 if (dimension > 2)
1965 {
1966 code <<= 8;
1967 if (NILP (code3))
1968 code |= charsetp->code_space[(dimension - 3) * 4];
1969 else
1970 {
1971 CHECK_NATNUM (code3);
1972 if (XFASTINT (code3) >= 0x100)
1973 args_out_of_range (make_number (0xFF), code3);
1974 code |= XFASTINT (code3);
1975 }
1976
1977 if (dimension > 3)
1978 {
1979 code <<= 8;
1980 if (NILP (code4))
1981 code |= charsetp->code_space[0];
1982 else
1983 {
1984 CHECK_NATNUM (code4);
1985 if (XFASTINT (code4) >= 0x100)
1986 args_out_of_range (make_number (0xFF), code4);
1987 code |= XFASTINT (code4);
1988 }
1989 }
1990 }
1991 }
1992 }
1993
1994 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1995 code &= 0x7F7F7F7F;
1996 c = DECODE_CHAR (charsetp, code);
1997 if (c < 0)
1998 error ("Invalid code(s)");
1999 return make_number (c);
2000 }
2001
2002
2003 /* Return the first charset in CHARSET_LIST that contains C.
2004 CHARSET_LIST is a list of charset IDs. If it is nil, use
2005 Vcharset_ordered_list. */
2006
2007 struct charset *
2008 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2009 {
2010 int maybe_null = 0;
2011
2012 if (NILP (charset_list))
2013 charset_list = Vcharset_ordered_list;
2014 else
2015 maybe_null = 1;
2016
2017 while (CONSP (charset_list))
2018 {
2019 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2020 unsigned code = ENCODE_CHAR (charset, c);
2021
2022 if (code != CHARSET_INVALID_CODE (charset))
2023 {
2024 if (code_return)
2025 *code_return = code;
2026 return charset;
2027 }
2028 charset_list = XCDR (charset_list);
2029 if (! maybe_null
2030 && c <= MAX_UNICODE_CHAR
2031 && EQ (charset_list, Vcharset_non_preferred_head))
2032 return CHARSET_FROM_ID (charset_unicode);
2033 }
2034 return (maybe_null ? NULL
2035 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2036 : CHARSET_FROM_ID (charset_eight_bit));
2037 }
2038
2039
2040 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2041 doc:
2042 /*Return list of charset and one to four position-codes of CH.
2043 The charset is decided by the current priority order of charsets.
2044 A position-code is a byte value of each dimension of the code-point of
2045 CH in the charset. */)
2046 (Lisp_Object ch)
2047 {
2048 struct charset *charset;
2049 int c, dimension;
2050 unsigned code;
2051 Lisp_Object val;
2052
2053 CHECK_CHARACTER (ch);
2054 c = XFASTINT (ch);
2055 charset = CHAR_CHARSET (c);
2056 if (! charset)
2057 abort ();
2058 code = ENCODE_CHAR (charset, c);
2059 if (code == CHARSET_INVALID_CODE (charset))
2060 abort ();
2061 dimension = CHARSET_DIMENSION (charset);
2062 for (val = Qnil; dimension > 0; dimension--)
2063 {
2064 val = Fcons (make_number (code & 0xFF), val);
2065 code >>= 8;
2066 }
2067 return Fcons (CHARSET_NAME (charset), val);
2068 }
2069
2070
2071 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2072 doc: /* Return the charset of highest priority that contains CH.
2073 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2074 from which to find the charset. It may also be a coding system. In
2075 that case, find the charset from what supported by that coding system. */)
2076 (Lisp_Object ch, Lisp_Object restriction)
2077 {
2078 struct charset *charset;
2079
2080 CHECK_CHARACTER (ch);
2081 if (NILP (restriction))
2082 charset = CHAR_CHARSET (XINT (ch));
2083 else
2084 {
2085 Lisp_Object charset_list;
2086
2087 if (CONSP (restriction))
2088 {
2089 for (charset_list = Qnil; CONSP (restriction);
2090 restriction = XCDR (restriction))
2091 {
2092 int id;
2093
2094 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2095 charset_list = Fcons (make_number (id), charset_list);
2096 }
2097 charset_list = Fnreverse (charset_list);
2098 }
2099 else
2100 charset_list = coding_system_charset_list (restriction);
2101 charset = char_charset (XINT (ch), charset_list, NULL);
2102 if (! charset)
2103 return Qnil;
2104 }
2105 return (CHARSET_NAME (charset));
2106 }
2107
2108
2109 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2110 doc: /*
2111 Return charset of a character in the current buffer at position POS.
2112 If POS is nil, it defauls to the current point.
2113 If POS is out of range, the value is nil. */)
2114 (Lisp_Object pos)
2115 {
2116 Lisp_Object ch;
2117 struct charset *charset;
2118
2119 ch = Fchar_after (pos);
2120 if (! INTEGERP (ch))
2121 return ch;
2122 charset = CHAR_CHARSET (XINT (ch));
2123 return (CHARSET_NAME (charset));
2124 }
2125
2126
2127 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2128 doc: /*
2129 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2130
2131 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2132 by their DIMENSION, CHARS, and FINAL-CHAR,
2133 whereas Emacs distinguishes them by charset symbol.
2134 See the documentation of the function `charset-info' for the meanings of
2135 DIMENSION, CHARS, and FINAL-CHAR. */)
2136 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2137 {
2138 int id;
2139 int chars_flag;
2140
2141 check_iso_charset_parameter (dimension, chars, final_char);
2142 chars_flag = XFASTINT (chars) == 96;
2143 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2144 XFASTINT (final_char));
2145 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2146 }
2147
2148
2149 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2150 0, 0, 0,
2151 doc: /*
2152 Internal use only.
2153 Clear temporary charset mapping tables.
2154 It should be called only from temacs invoked for dumping. */)
2155 (void)
2156 {
2157 if (temp_charset_work)
2158 {
2159 free (temp_charset_work);
2160 temp_charset_work = NULL;
2161 }
2162
2163 if (CHAR_TABLE_P (Vchar_unify_table))
2164 Foptimize_char_table (Vchar_unify_table, Qnil);
2165
2166 return Qnil;
2167 }
2168
2169 DEFUN ("charset-priority-list", Fcharset_priority_list,
2170 Scharset_priority_list, 0, 1, 0,
2171 doc: /* Return the list of charsets ordered by priority.
2172 HIGHESTP non-nil means just return the highest priority one. */)
2173 (Lisp_Object highestp)
2174 {
2175 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2176
2177 if (!NILP (highestp))
2178 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2179
2180 while (!NILP (list))
2181 {
2182 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2183 list = XCDR (list);
2184 }
2185 return Fnreverse (val);
2186 }
2187
2188 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2189 1, MANY, 0,
2190 doc: /* Assign higher priority to the charsets given as arguments.
2191 usage: (set-charset-priority &rest charsets) */)
2192 (int nargs, Lisp_Object *args)
2193 {
2194 Lisp_Object new_head, old_list, arglist[2];
2195 Lisp_Object list_2022, list_emacs_mule;
2196 int i, id;
2197
2198 old_list = Fcopy_sequence (Vcharset_ordered_list);
2199 new_head = Qnil;
2200 for (i = 0; i < nargs; i++)
2201 {
2202 CHECK_CHARSET_GET_ID (args[i], id);
2203 if (! NILP (Fmemq (make_number (id), old_list)))
2204 {
2205 old_list = Fdelq (make_number (id), old_list);
2206 new_head = Fcons (make_number (id), new_head);
2207 }
2208 }
2209 arglist[0] = Fnreverse (new_head);
2210 arglist[1] = Vcharset_non_preferred_head = old_list;
2211 Vcharset_ordered_list = Fnconc (2, arglist);
2212 charset_ordered_list_tick++;
2213
2214 charset_unibyte = -1;
2215 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2216 CONSP (old_list); old_list = XCDR (old_list))
2217 {
2218 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2219 list_2022 = Fcons (XCAR (old_list), list_2022);
2220 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2221 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2222 if (charset_unibyte < 0)
2223 {
2224 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2225
2226 if (CHARSET_DIMENSION (charset) == 1
2227 && CHARSET_ASCII_COMPATIBLE_P (charset)
2228 && CHARSET_MAX_CHAR (charset) >= 0x80)
2229 charset_unibyte = CHARSET_ID (charset);
2230 }
2231 }
2232 Viso_2022_charset_list = Fnreverse (list_2022);
2233 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2234 if (charset_unibyte < 0)
2235 charset_unibyte = charset_iso_8859_1;
2236
2237 return Qnil;
2238 }
2239
2240 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2241 0, 1, 0,
2242 doc: /* Internal use only.
2243 Return charset identification number of CHARSET. */)
2244 (Lisp_Object charset)
2245 {
2246 int id;
2247
2248 CHECK_CHARSET_GET_ID (charset, id);
2249 return make_number (id);
2250 }
2251
2252 \f
2253 void
2254 init_charset (void)
2255 {
2256 Lisp_Object tempdir;
2257 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2258 if (access ((char *) SDATA (tempdir), 0) < 0)
2259 {
2260 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2261 Emacs will not function correctly without the character map files.\n\
2262 Please check your installation!\n",
2263 tempdir);
2264 /* TODO should this be a fatal error? (Bug#909) */
2265 }
2266
2267 Vcharset_map_path = Fcons (tempdir, Qnil);
2268 }
2269
2270
2271 void
2272 init_charset_once (void)
2273 {
2274 int i, j, k;
2275
2276 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2277 for (j = 0; j < ISO_MAX_CHARS; j++)
2278 for (k = 0; k < ISO_MAX_FINAL; k++)
2279 iso_charset_table[i][j][k] = -1;
2280
2281 for (i = 0; i < 256; i++)
2282 emacs_mule_charset[i] = NULL;
2283
2284 charset_jisx0201_roman = -1;
2285 charset_jisx0208_1978 = -1;
2286 charset_jisx0208 = -1;
2287 charset_ksc5601 = -1;
2288 }
2289
2290 #ifdef emacs
2291
2292 void
2293 syms_of_charset (void)
2294 {
2295 DEFSYM (Qcharsetp, "charsetp");
2296
2297 DEFSYM (Qascii, "ascii");
2298 DEFSYM (Qunicode, "unicode");
2299 DEFSYM (Qemacs, "emacs");
2300 DEFSYM (Qeight_bit, "eight-bit");
2301 DEFSYM (Qiso_8859_1, "iso-8859-1");
2302
2303 DEFSYM (Qgl, "gl");
2304 DEFSYM (Qgr, "gr");
2305
2306 staticpro (&Vcharset_ordered_list);
2307 Vcharset_ordered_list = Qnil;
2308
2309 staticpro (&Viso_2022_charset_list);
2310 Viso_2022_charset_list = Qnil;
2311
2312 staticpro (&Vemacs_mule_charset_list);
2313 Vemacs_mule_charset_list = Qnil;
2314
2315 /* Don't staticpro them here. It's done in syms_of_fns. */
2316 QCtest = intern (":test");
2317 Qeq = intern ("eq");
2318
2319 staticpro (&Vcharset_hash_table);
2320 {
2321 Lisp_Object args[2];
2322 args[0] = QCtest;
2323 args[1] = Qeq;
2324 Vcharset_hash_table = Fmake_hash_table (2, args);
2325 }
2326
2327 charset_table_size = 128;
2328 charset_table = ((struct charset *)
2329 xmalloc (sizeof (struct charset) * charset_table_size));
2330 charset_table_used = 0;
2331
2332 defsubr (&Scharsetp);
2333 defsubr (&Smap_charset_chars);
2334 defsubr (&Sdefine_charset_internal);
2335 defsubr (&Sdefine_charset_alias);
2336 defsubr (&Scharset_plist);
2337 defsubr (&Sset_charset_plist);
2338 defsubr (&Sunify_charset);
2339 defsubr (&Sget_unused_iso_final_char);
2340 defsubr (&Sdeclare_equiv_charset);
2341 defsubr (&Sfind_charset_region);
2342 defsubr (&Sfind_charset_string);
2343 defsubr (&Sdecode_char);
2344 defsubr (&Sencode_char);
2345 defsubr (&Ssplit_char);
2346 defsubr (&Smake_char);
2347 defsubr (&Schar_charset);
2348 defsubr (&Scharset_after);
2349 defsubr (&Siso_charset);
2350 defsubr (&Sclear_charset_maps);
2351 defsubr (&Scharset_priority_list);
2352 defsubr (&Sset_charset_priority);
2353 defsubr (&Scharset_id_internal);
2354
2355 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2356 doc: /* *List of directories to search for charset map files. */);
2357 Vcharset_map_path = Qnil;
2358
2359 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2360 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2361 inhibit_load_charset_map = 0;
2362
2363 DEFVAR_LISP ("charset-list", &Vcharset_list,
2364 doc: /* List of all charsets ever defined. */);
2365 Vcharset_list = Qnil;
2366
2367 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2368 doc: /* ISO639 language mnemonic symbol for the current language environment.
2369 If the current language environment is for multiple languages (e.g. "Latin-1"),
2370 the value may be a list of mnemonics. */);
2371 Vcurrent_iso639_language = Qnil;
2372
2373 charset_ascii
2374 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2375 0, 127, 'B', -1, 0, 1, 0, 0);
2376 charset_iso_8859_1
2377 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2378 0, 255, -1, -1, -1, 1, 0, 0);
2379 charset_unicode
2380 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2381 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2382 charset_emacs
2383 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2384 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2385 charset_eight_bit
2386 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2387 128, 255, -1, 0, -1, 0, 1,
2388 MAX_5_BYTE_CHAR + 1);
2389 charset_unibyte = charset_iso_8859_1;
2390 }
2391
2392 #endif /* emacs */
2393
2394 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2395 (do not change this comment) */