Convert (most) functions in src to standard C.
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
41
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
49
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
54
55 */
56
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
60
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
64
65 /* Table of struct charset. */
66 struct charset *charset_table;
67
68 static int charset_table_size;
69 static int charset_table_used;
70
71 Lisp_Object Qcharsetp;
72
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
79
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
86
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
92
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
95
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
98
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
101
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
105
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
110
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
113
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
116
117 struct charset *emacs_mule_charset[256];
118
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
123 Lisp_Object Vcharset_map_path;
124
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
127
128 Lisp_Object Vcurrent_iso639_language;
129
130 /* Defined in chartab.c */
131 extern void
132 map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to);
136
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
152 : -1)
153
154
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
171
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
174
175 static struct
176 {
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205 } *temp_charset_work;
206
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
229 \f
230
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded;
234
235 struct charset_map_entries
236 {
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242 };
243
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
253
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
259
260 If CONTROL_FLAG is 2, setup the following tables.
261
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 */
268
269 static void
270 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
271 {
272 Lisp_Object vec, table;
273 unsigned max_code = CHARSET_MAX_CODE (charset);
274 int ascii_compatible_p = charset->ascii_compatible_p;
275 int min_char, max_char, nonascii_min_char;
276 int i;
277 unsigned char *fast_map = charset->fast_map;
278
279 if (n_entries <= 0)
280 return;
281
282 if (control_flag)
283 {
284 if (! inhibit_load_charset_map)
285 {
286 if (control_flag == 1)
287 {
288 if (charset->method == CHARSET_METHOD_MAP)
289 {
290 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
291
292 vec = CHARSET_DECODER (charset)
293 = Fmake_vector (make_number (n), make_number (-1));
294 }
295 else
296 {
297 char_table_set_range (Vchar_unify_table,
298 charset->min_char, charset->max_char,
299 Qnil);
300 }
301 }
302 else
303 {
304 table = Fmake_char_table (Qnil, Qnil);
305 if (charset->method == CHARSET_METHOD_MAP)
306 CHARSET_ENCODER (charset) = table;
307 else
308 CHARSET_DEUNIFIER (charset) = table;
309 }
310 }
311 else
312 {
313 if (! temp_charset_work)
314 temp_charset_work = malloc (sizeof (*temp_charset_work));
315 if (control_flag == 1)
316 {
317 memset (temp_charset_work->table.decoder, -1,
318 sizeof (int) * 0x10000);
319 }
320 else
321 {
322 memset (temp_charset_work->table.encoder, 0,
323 sizeof (unsigned short) * 0x20000);
324 temp_charset_work->zero_index_char = -1;
325 }
326 temp_charset_work->current = charset;
327 temp_charset_work->for_encoder = (control_flag == 2);
328 control_flag += 2;
329 }
330 charset_map_loaded = 1;
331 }
332
333 min_char = max_char = entries->entry[0].c;
334 nonascii_min_char = MAX_CHAR;
335 for (i = 0; i < n_entries; i++)
336 {
337 unsigned from, to;
338 int from_index, to_index;
339 int from_c, to_c;
340 int idx = i % 0x10000;
341
342 if (i > 0 && idx == 0)
343 entries = entries->next;
344 from = entries->entry[idx].from;
345 to = entries->entry[idx].to;
346 from_c = entries->entry[idx].c;
347 from_index = CODE_POINT_TO_INDEX (charset, from);
348 if (from == to)
349 {
350 to_index = from_index;
351 to_c = from_c;
352 }
353 else
354 {
355 to_index = CODE_POINT_TO_INDEX (charset, to);
356 to_c = from_c + (to_index - from_index);
357 }
358 if (from_index < 0 || to_index < 0)
359 continue;
360
361 if (to_c > max_char)
362 max_char = to_c;
363 else if (from_c < min_char)
364 min_char = from_c;
365
366 if (control_flag == 1)
367 {
368 if (charset->method == CHARSET_METHOD_MAP)
369 for (; from_index <= to_index; from_index++, from_c++)
370 ASET (vec, from_index, make_number (from_c));
371 else
372 for (; from_index <= to_index; from_index++, from_c++)
373 CHAR_TABLE_SET (Vchar_unify_table,
374 CHARSET_CODE_OFFSET (charset) + from_index,
375 make_number (from_c));
376 }
377 else if (control_flag == 2)
378 {
379 if (charset->method == CHARSET_METHOD_MAP
380 && CHARSET_COMPACT_CODES_P (charset))
381 for (; from_index <= to_index; from_index++, from_c++)
382 {
383 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
384
385 if (NILP (CHAR_TABLE_REF (table, from_c)))
386 CHAR_TABLE_SET (table, from_c, make_number (code));
387 }
388 else
389 for (; from_index <= to_index; from_index++, from_c++)
390 {
391 if (NILP (CHAR_TABLE_REF (table, from_c)))
392 CHAR_TABLE_SET (table, from_c, make_number (from_index));
393 }
394 }
395 else if (control_flag == 3)
396 for (; from_index <= to_index; from_index++, from_c++)
397 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
398 else if (control_flag == 4)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
401 else /* control_flag == 0 */
402 {
403 if (ascii_compatible_p)
404 {
405 if (! ASCII_BYTE_P (from_c))
406 {
407 if (from_c < nonascii_min_char)
408 nonascii_min_char = from_c;
409 }
410 else if (! ASCII_BYTE_P (to_c))
411 {
412 nonascii_min_char = 0x80;
413 }
414 }
415
416 for (; from_c <= to_c; from_c++)
417 CHARSET_FAST_MAP_SET (from_c, fast_map);
418 }
419 }
420
421 if (control_flag == 0)
422 {
423 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
424 ? nonascii_min_char : min_char);
425 CHARSET_MAX_CHAR (charset) = max_char;
426 }
427 else if (control_flag == 4)
428 {
429 temp_charset_work->min_char = min_char;
430 temp_charset_work->max_char = max_char;
431 }
432 }
433
434
435 /* Read a hexadecimal number (preceded by "0x") from the file FP while
436 paying attention to comment charcter '#'. */
437
438 static INLINE unsigned
439 read_hex (FILE *fp, int *eof)
440 {
441 int c;
442 unsigned n;
443
444 while ((c = getc (fp)) != EOF)
445 {
446 if (c == '#')
447 {
448 while ((c = getc (fp)) != EOF && c != '\n');
449 }
450 else if (c == '0')
451 {
452 if ((c = getc (fp)) == EOF || c == 'x')
453 break;
454 }
455 }
456 if (c == EOF)
457 {
458 *eof = 1;
459 return 0;
460 }
461 *eof = 0;
462 n = 0;
463 if (c == 'x')
464 while ((c = getc (fp)) != EOF && isxdigit (c))
465 n = ((n << 4)
466 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
467 else
468 while ((c = getc (fp)) != EOF && isdigit (c))
469 n = (n * 10) + c - '0';
470 if (c != EOF)
471 ungetc (c, fp);
472 return n;
473 }
474
475 extern Lisp_Object Qfile_name_handler_alist;
476
477 /* Return a mapping vector for CHARSET loaded from MAPFILE.
478 Each line of MAPFILE has this form
479 0xAAAA 0xCCCC
480 where 0xAAAA is a code-point and 0xCCCC is the corresponding
481 character code, or this form
482 0xAAAA-0xBBBB 0xCCCC
483 where 0xAAAA and 0xBBBB are code-points specifying a range, and
484 0xCCCC is the first character code of the range.
485
486 The returned vector has this form:
487 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
488 where CODE1 is a code-point or a cons of code-points specifying a
489 range.
490
491 Note that this function uses `openp' to open MAPFILE but ignores
492 `file-name-handler-alist' to avoid running any Lisp code. */
493
494 extern void add_to_log (char *, Lisp_Object, Lisp_Object);
495
496 static void
497 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
498 {
499 unsigned min_code = CHARSET_MIN_CODE (charset);
500 unsigned max_code = CHARSET_MAX_CODE (charset);
501 int fd;
502 FILE *fp;
503 int eof;
504 Lisp_Object suffixes;
505 struct charset_map_entries *head, *entries;
506 int n_entries, count;
507 USE_SAFE_ALLOCA;
508
509 suffixes = Fcons (build_string (".map"),
510 Fcons (build_string (".TXT"), Qnil));
511
512 count = SPECPDL_INDEX ();
513 specbind (Qfile_name_handler_alist, Qnil);
514 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
515 unbind_to (count, Qnil);
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 error ("Failure in loading charset map: %S", SDATA (mapfile));
519
520 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
521 large (larger than MAX_ALLOCA). */
522 SAFE_ALLOCA (head, struct charset_map_entries *,
523 sizeof (struct charset_map_entries));
524 entries = head;
525 bzero (entries, sizeof (struct charset_map_entries));
526
527 n_entries = 0;
528 eof = 0;
529 while (1)
530 {
531 unsigned from, to;
532 int c;
533 int idx;
534
535 from = read_hex (fp, &eof);
536 if (eof)
537 break;
538 if (getc (fp) == '-')
539 to = read_hex (fp, &eof);
540 else
541 to = from;
542 c = (int) read_hex (fp, &eof);
543
544 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
545 continue;
546
547 if (n_entries > 0 && (n_entries % 0x10000) == 0)
548 {
549 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
550 sizeof (struct charset_map_entries));
551 entries = entries->next;
552 bzero (entries, sizeof (struct charset_map_entries));
553 }
554 idx = n_entries % 0x10000;
555 entries->entry[idx].from = from;
556 entries->entry[idx].to = to;
557 entries->entry[idx].c = c;
558 n_entries++;
559 }
560 fclose (fp);
561
562 load_charset_map (charset, head, n_entries, control_flag);
563 SAFE_FREE ();
564 }
565
566 static void
567 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
568 {
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
575 USE_SAFE_ALLOCA;
576
577 if (len % 2 == 1)
578 {
579 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
580 return;
581 }
582
583 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
584 large (larger than MAX_ALLOCA). */
585 SAFE_ALLOCA (head, struct charset_map_entries *,
586 sizeof (struct charset_map_entries));
587 entries = head;
588 bzero (entries, sizeof (struct charset_map_entries));
589
590 n_entries = 0;
591 for (i = 0; i < len; i += 2)
592 {
593 Lisp_Object val, val2;
594 unsigned from, to;
595 int c;
596 int idx;
597
598 val = AREF (vec, i);
599 if (CONSP (val))
600 {
601 val2 = XCDR (val);
602 val = XCAR (val);
603 CHECK_NATNUM (val);
604 CHECK_NATNUM (val2);
605 from = XFASTINT (val);
606 to = XFASTINT (val2);
607 }
608 else
609 {
610 CHECK_NATNUM (val);
611 from = to = XFASTINT (val);
612 }
613 val = AREF (vec, i + 1);
614 CHECK_NATNUM (val);
615 c = XFASTINT (val);
616
617 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
618 continue;
619
620 if (n_entries > 0 && (n_entries % 0x10000) == 0)
621 {
622 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
623 sizeof (struct charset_map_entries));
624 entries = entries->next;
625 bzero (entries, sizeof (struct charset_map_entries));
626 }
627 idx = n_entries % 0x10000;
628 entries->entry[idx].from = from;
629 entries->entry[idx].to = to;
630 entries->entry[idx].c = c;
631 n_entries++;
632 }
633
634 load_charset_map (charset, head, n_entries, control_flag);
635 SAFE_FREE ();
636 }
637
638
639 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
640 map it is (see the comment of load_charset_map for the detail). */
641
642 static void
643 load_charset (struct charset *charset, int control_flag)
644 {
645 Lisp_Object map;
646
647 if (inhibit_load_charset_map
648 && temp_charset_work
649 && charset == temp_charset_work->current
650 && ((control_flag == 2) == temp_charset_work->for_encoder))
651 return;
652
653 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
654 map = CHARSET_MAP (charset);
655 else if (CHARSET_UNIFIED_P (charset))
656 map = CHARSET_UNIFY_MAP (charset);
657 if (STRINGP (map))
658 load_charset_map_from_file (charset, map, control_flag);
659 else
660 load_charset_map_from_vector (charset, map, control_flag);
661 }
662
663
664 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
665 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
666 (object)
667 Lisp_Object object;
668 {
669 return (CHARSETP (object) ? Qt : Qnil);
670 }
671
672
673 void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
674 Lisp_Object function, Lisp_Object arg,
675 unsigned from, unsigned to);
676
677 void
678 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
679 {
680 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
681 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
682 Lisp_Object range;
683 int c, stop;
684 struct gcpro gcpro1;
685
686 range = Fcons (Qnil, Qnil);
687 GCPRO1 (range);
688
689 c = temp_charset_work->min_char;
690 stop = (temp_charset_work->max_char < 0x20000
691 ? temp_charset_work->max_char : 0xFFFF);
692
693 while (1)
694 {
695 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
696
697 if (index >= from_idx && index <= to_idx)
698 {
699 if (NILP (XCAR (range)))
700 XSETCAR (range, make_number (c));
701 }
702 else if (! NILP (XCAR (range)))
703 {
704 XSETCDR (range, make_number (c - 1));
705 if (c_function)
706 (*c_function) (arg, range);
707 else
708 call2 (function, range, arg);
709 XSETCAR (range, Qnil);
710 }
711 if (c == stop)
712 {
713 if (c == temp_charset_work->max_char)
714 {
715 if (! NILP (XCAR (range)))
716 {
717 XSETCDR (range, make_number (c));
718 if (c_function)
719 (*c_function) (arg, range);
720 else
721 call2 (function, range, arg);
722 }
723 break;
724 }
725 c = 0x1FFFF;
726 stop = temp_charset_work->max_char;
727 }
728 c++;
729 }
730 UNGCPRO;
731 }
732
733 void
734 map_charset_chars (c_function, function, arg,
735 charset, from, to)
736 void (*c_function) (Lisp_Object, Lisp_Object);
737 Lisp_Object function, arg;
738 struct charset *charset;
739 unsigned from, to;
740 {
741 Lisp_Object range;
742 int partial;
743
744 partial = (from > CHARSET_MIN_CODE (charset)
745 || to < CHARSET_MAX_CODE (charset));
746
747 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
748 {
749 int from_idx = CODE_POINT_TO_INDEX (charset, from);
750 int to_idx = CODE_POINT_TO_INDEX (charset, to);
751 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
752 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
753
754 if (CHARSET_UNIFIED_P (charset))
755 {
756 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
757 load_charset (charset, 2);
758 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
759 map_char_table_for_charset (c_function, function,
760 CHARSET_DEUNIFIER (charset), arg,
761 partial ? charset : NULL, from, to);
762 else
763 map_charset_for_dump (c_function, function, arg, from, to);
764 }
765
766 range = Fcons (make_number (from_c), make_number (to_c));
767 if (NILP (function))
768 (*c_function) (arg, range);
769 else
770 call2 (function, range, arg);
771 }
772 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
773 {
774 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
775 load_charset (charset, 2);
776 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
777 map_char_table_for_charset (c_function, function,
778 CHARSET_ENCODER (charset), arg,
779 partial ? charset : NULL, from, to);
780 else
781 map_charset_for_dump (c_function, function, arg, from, to);
782 }
783 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
784 {
785 Lisp_Object subset_info;
786 int offset;
787
788 subset_info = CHARSET_SUBSET (charset);
789 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
790 offset = XINT (AREF (subset_info, 3));
791 from -= offset;
792 if (from < XFASTINT (AREF (subset_info, 1)))
793 from = XFASTINT (AREF (subset_info, 1));
794 to -= offset;
795 if (to > XFASTINT (AREF (subset_info, 2)))
796 to = XFASTINT (AREF (subset_info, 2));
797 map_charset_chars (c_function, function, arg, charset, from, to);
798 }
799 else /* i.e. CHARSET_METHOD_SUPERSET */
800 {
801 Lisp_Object parents;
802
803 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
804 parents = XCDR (parents))
805 {
806 int offset;
807 unsigned this_from, this_to;
808
809 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
810 offset = XINT (XCDR (XCAR (parents)));
811 this_from = from > offset ? from - offset : 0;
812 this_to = to > offset ? to - offset : 0;
813 if (this_from < CHARSET_MIN_CODE (charset))
814 this_from = CHARSET_MIN_CODE (charset);
815 if (this_to > CHARSET_MAX_CODE (charset))
816 this_to = CHARSET_MAX_CODE (charset);
817 map_charset_chars (c_function, function, arg, charset,
818 this_from, this_to);
819 }
820 }
821 }
822
823 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
824 doc: /* Call FUNCTION for all characters in CHARSET.
825 FUNCTION is called with an argument RANGE and the optional 3rd
826 argument ARG.
827
828 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
829 characters contained in CHARSET.
830
831 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
832 range of code points (in CHARSET) of target characters. */)
833 (function, charset, arg, from_code, to_code)
834 Lisp_Object function, charset, arg, from_code, to_code;
835 {
836 struct charset *cs;
837 unsigned from, to;
838
839 CHECK_CHARSET_GET_CHARSET (charset, cs);
840 if (NILP (from_code))
841 from = CHARSET_MIN_CODE (cs);
842 else
843 {
844 CHECK_NATNUM (from_code);
845 from = XINT (from_code);
846 if (from < CHARSET_MIN_CODE (cs))
847 from = CHARSET_MIN_CODE (cs);
848 }
849 if (NILP (to_code))
850 to = CHARSET_MAX_CODE (cs);
851 else
852 {
853 CHECK_NATNUM (to_code);
854 to = XINT (to_code);
855 if (to > CHARSET_MAX_CODE (cs))
856 to = CHARSET_MAX_CODE (cs);
857 }
858 map_charset_chars (NULL, function, arg, cs, from, to);
859 return Qnil;
860 }
861
862
863 /* Define a charset according to the arguments. The Nth argument is
864 the Nth attribute of the charset (the last attribute `charset-id'
865 is not included). See the docstring of `define-charset' for the
866 detail. */
867
868 DEFUN ("define-charset-internal", Fdefine_charset_internal,
869 Sdefine_charset_internal, charset_arg_max, MANY, 0,
870 doc: /* For internal use only.
871 usage: (define-charset-internal ...) */)
872 (nargs, args)
873 int nargs;
874 Lisp_Object *args;
875 {
876 /* Charset attr vector. */
877 Lisp_Object attrs;
878 Lisp_Object val;
879 unsigned hash_code;
880 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
881 int i, j;
882 struct charset charset;
883 int id;
884 int dimension;
885 int new_definition_p;
886 int nchars;
887
888 if (nargs != charset_arg_max)
889 return Fsignal (Qwrong_number_of_arguments,
890 Fcons (intern ("define-charset-internal"),
891 make_number (nargs)));
892
893 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
894
895 CHECK_SYMBOL (args[charset_arg_name]);
896 ASET (attrs, charset_name, args[charset_arg_name]);
897
898 val = args[charset_arg_code_space];
899 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
900 {
901 int min_byte, max_byte;
902
903 min_byte = XINT (Faref (val, make_number (i * 2)));
904 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
905 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
906 error ("Invalid :code-space value");
907 charset.code_space[i * 4] = min_byte;
908 charset.code_space[i * 4 + 1] = max_byte;
909 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
910 nchars *= charset.code_space[i * 4 + 2];
911 charset.code_space[i * 4 + 3] = nchars;
912 if (max_byte > 0)
913 dimension = i + 1;
914 }
915
916 val = args[charset_arg_dimension];
917 if (NILP (val))
918 charset.dimension = dimension;
919 else
920 {
921 CHECK_NATNUM (val);
922 charset.dimension = XINT (val);
923 if (charset.dimension < 1 || charset.dimension > 4)
924 args_out_of_range_3 (val, make_number (1), make_number (4));
925 }
926
927 charset.code_linear_p
928 = (charset.dimension == 1
929 || (charset.code_space[2] == 256
930 && (charset.dimension == 2
931 || (charset.code_space[6] == 256
932 && (charset.dimension == 3
933 || charset.code_space[10] == 256)))));
934
935 if (! charset.code_linear_p)
936 {
937 charset.code_space_mask = (unsigned char *) xmalloc (256);
938 bzero (charset.code_space_mask, 256);
939 for (i = 0; i < 4; i++)
940 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
941 j++)
942 charset.code_space_mask[j] |= (1 << i);
943 }
944
945 charset.iso_chars_96 = charset.code_space[2] == 96;
946
947 charset.min_code = (charset.code_space[0]
948 | (charset.code_space[4] << 8)
949 | (charset.code_space[8] << 16)
950 | (charset.code_space[12] << 24));
951 charset.max_code = (charset.code_space[1]
952 | (charset.code_space[5] << 8)
953 | (charset.code_space[9] << 16)
954 | (charset.code_space[13] << 24));
955 charset.char_index_offset = 0;
956
957 val = args[charset_arg_min_code];
958 if (! NILP (val))
959 {
960 unsigned code;
961
962 if (INTEGERP (val))
963 code = XINT (val);
964 else
965 {
966 CHECK_CONS (val);
967 CHECK_NUMBER_CAR (val);
968 CHECK_NUMBER_CDR (val);
969 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
970 }
971 if (code < charset.min_code
972 || code > charset.max_code)
973 args_out_of_range_3 (make_number (charset.min_code),
974 make_number (charset.max_code), val);
975 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
976 charset.min_code = code;
977 }
978
979 val = args[charset_arg_max_code];
980 if (! NILP (val))
981 {
982 unsigned code;
983
984 if (INTEGERP (val))
985 code = XINT (val);
986 else
987 {
988 CHECK_CONS (val);
989 CHECK_NUMBER_CAR (val);
990 CHECK_NUMBER_CDR (val);
991 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
992 }
993 if (code < charset.min_code
994 || code > charset.max_code)
995 args_out_of_range_3 (make_number (charset.min_code),
996 make_number (charset.max_code), val);
997 charset.max_code = code;
998 }
999
1000 charset.compact_codes_p = charset.max_code < 0x10000;
1001
1002 val = args[charset_arg_invalid_code];
1003 if (NILP (val))
1004 {
1005 if (charset.min_code > 0)
1006 charset.invalid_code = 0;
1007 else
1008 {
1009 XSETINT (val, charset.max_code + 1);
1010 if (XINT (val) == charset.max_code + 1)
1011 charset.invalid_code = charset.max_code + 1;
1012 else
1013 error ("Attribute :invalid-code must be specified");
1014 }
1015 }
1016 else
1017 {
1018 CHECK_NATNUM (val);
1019 charset.invalid_code = XFASTINT (val);
1020 }
1021
1022 val = args[charset_arg_iso_final];
1023 if (NILP (val))
1024 charset.iso_final = -1;
1025 else
1026 {
1027 CHECK_NUMBER (val);
1028 if (XINT (val) < '0' || XINT (val) > 127)
1029 error ("Invalid iso-final-char: %d", XINT (val));
1030 charset.iso_final = XINT (val);
1031 }
1032
1033 val = args[charset_arg_iso_revision];
1034 if (NILP (val))
1035 charset.iso_revision = -1;
1036 else
1037 {
1038 CHECK_NUMBER (val);
1039 if (XINT (val) > 63)
1040 args_out_of_range (make_number (63), val);
1041 charset.iso_revision = XINT (val);
1042 }
1043
1044 val = args[charset_arg_emacs_mule_id];
1045 if (NILP (val))
1046 charset.emacs_mule_id = -1;
1047 else
1048 {
1049 CHECK_NATNUM (val);
1050 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1051 error ("Invalid emacs-mule-id: %d", XINT (val));
1052 charset.emacs_mule_id = XINT (val);
1053 }
1054
1055 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1056
1057 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1058
1059 charset.unified_p = 0;
1060
1061 bzero (charset.fast_map, sizeof (charset.fast_map));
1062
1063 if (! NILP (args[charset_arg_code_offset]))
1064 {
1065 val = args[charset_arg_code_offset];
1066 CHECK_NUMBER (val);
1067
1068 charset.method = CHARSET_METHOD_OFFSET;
1069 charset.code_offset = XINT (val);
1070
1071 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1072 charset.min_char = i + charset.code_offset;
1073 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1074 charset.max_char = i + charset.code_offset;
1075 if (charset.max_char > MAX_CHAR)
1076 error ("Unsupported max char: %d", charset.max_char);
1077
1078 i = (charset.min_char >> 7) << 7;
1079 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1080 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1081 i = (i >> 12) << 12;
1082 for (; i <= charset.max_char; i += 0x1000)
1083 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1084 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1085 charset.ascii_compatible_p = 1;
1086 }
1087 else if (! NILP (args[charset_arg_map]))
1088 {
1089 val = args[charset_arg_map];
1090 ASET (attrs, charset_map, val);
1091 charset.method = CHARSET_METHOD_MAP;
1092 }
1093 else if (! NILP (args[charset_arg_subset]))
1094 {
1095 Lisp_Object parent;
1096 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1097 struct charset *parent_charset;
1098
1099 val = args[charset_arg_subset];
1100 parent = Fcar (val);
1101 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1102 parent_min_code = Fnth (make_number (1), val);
1103 CHECK_NATNUM (parent_min_code);
1104 parent_max_code = Fnth (make_number (2), val);
1105 CHECK_NATNUM (parent_max_code);
1106 parent_code_offset = Fnth (make_number (3), val);
1107 CHECK_NUMBER (parent_code_offset);
1108 val = Fmake_vector (make_number (4), Qnil);
1109 ASET (val, 0, make_number (parent_charset->id));
1110 ASET (val, 1, parent_min_code);
1111 ASET (val, 2, parent_max_code);
1112 ASET (val, 3, parent_code_offset);
1113 ASET (attrs, charset_subset, val);
1114
1115 charset.method = CHARSET_METHOD_SUBSET;
1116 /* Here, we just copy the parent's fast_map. It's not accurate,
1117 but at least it works for quickly detecting which character
1118 DOESN'T belong to this charset. */
1119 for (i = 0; i < 190; i++)
1120 charset.fast_map[i] = parent_charset->fast_map[i];
1121
1122 /* We also copy these for parents. */
1123 charset.min_char = parent_charset->min_char;
1124 charset.max_char = parent_charset->max_char;
1125 }
1126 else if (! NILP (args[charset_arg_superset]))
1127 {
1128 val = args[charset_arg_superset];
1129 charset.method = CHARSET_METHOD_SUPERSET;
1130 val = Fcopy_sequence (val);
1131 ASET (attrs, charset_superset, val);
1132
1133 charset.min_char = MAX_CHAR;
1134 charset.max_char = 0;
1135 for (; ! NILP (val); val = Fcdr (val))
1136 {
1137 Lisp_Object elt, car_part, cdr_part;
1138 int this_id, offset;
1139 struct charset *this_charset;
1140
1141 elt = Fcar (val);
1142 if (CONSP (elt))
1143 {
1144 car_part = XCAR (elt);
1145 cdr_part = XCDR (elt);
1146 CHECK_CHARSET_GET_ID (car_part, this_id);
1147 CHECK_NUMBER (cdr_part);
1148 offset = XINT (cdr_part);
1149 }
1150 else
1151 {
1152 CHECK_CHARSET_GET_ID (elt, this_id);
1153 offset = 0;
1154 }
1155 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1156
1157 this_charset = CHARSET_FROM_ID (this_id);
1158 if (charset.min_char > this_charset->min_char)
1159 charset.min_char = this_charset->min_char;
1160 if (charset.max_char < this_charset->max_char)
1161 charset.max_char = this_charset->max_char;
1162 for (i = 0; i < 190; i++)
1163 charset.fast_map[i] |= this_charset->fast_map[i];
1164 }
1165 }
1166 else
1167 error ("None of :code-offset, :map, :parents are specified");
1168
1169 val = args[charset_arg_unify_map];
1170 if (! NILP (val) && !STRINGP (val))
1171 CHECK_VECTOR (val);
1172 ASET (attrs, charset_unify_map, val);
1173
1174 CHECK_LIST (args[charset_arg_plist]);
1175 ASET (attrs, charset_plist, args[charset_arg_plist]);
1176
1177 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1178 &hash_code);
1179 if (charset.hash_index >= 0)
1180 {
1181 new_definition_p = 0;
1182 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1183 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1184 }
1185 else
1186 {
1187 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1188 hash_code);
1189 if (charset_table_used == charset_table_size)
1190 {
1191 struct charset *new_table
1192 = (struct charset *) xmalloc (sizeof (struct charset)
1193 * (charset_table_size + 16));
1194 bcopy (charset_table, new_table,
1195 sizeof (struct charset) * charset_table_size);
1196 charset_table_size += 16;
1197 charset_table = new_table;
1198 }
1199 id = charset_table_used++;
1200 new_definition_p = 1;
1201 }
1202
1203 ASET (attrs, charset_id, make_number (id));
1204 charset.id = id;
1205 charset_table[id] = charset;
1206
1207 if (charset.method == CHARSET_METHOD_MAP)
1208 {
1209 load_charset (&charset, 0);
1210 charset_table[id] = charset;
1211 }
1212
1213 if (charset.iso_final >= 0)
1214 {
1215 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1216 charset.iso_final) = id;
1217 if (new_definition_p)
1218 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1219 Fcons (make_number (id), Qnil));
1220 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1221 charset_jisx0201_roman = id;
1222 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1223 charset_jisx0208_1978 = id;
1224 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1225 charset_jisx0208 = id;
1226 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1227 charset_ksc5601 = id;
1228 }
1229
1230 if (charset.emacs_mule_id >= 0)
1231 {
1232 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1233 if (charset.emacs_mule_id < 0xA0)
1234 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1235 else
1236 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1237 if (new_definition_p)
1238 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1239 Fcons (make_number (id), Qnil));
1240 }
1241
1242 if (new_definition_p)
1243 {
1244 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1245 if (charset.supplementary_p)
1246 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1247 Fcons (make_number (id), Qnil));
1248 else
1249 {
1250 Lisp_Object tail;
1251
1252 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1253 {
1254 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1255
1256 if (cs->supplementary_p)
1257 break;
1258 }
1259 if (EQ (tail, Vcharset_ordered_list))
1260 Vcharset_ordered_list = Fcons (make_number (id),
1261 Vcharset_ordered_list);
1262 else if (NILP (tail))
1263 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1264 Fcons (make_number (id), Qnil));
1265 else
1266 {
1267 val = Fcons (XCAR (tail), XCDR (tail));
1268 XSETCDR (tail, val);
1269 XSETCAR (tail, make_number (id));
1270 }
1271 }
1272 charset_ordered_list_tick++;
1273 }
1274
1275 return Qnil;
1276 }
1277
1278
1279 /* Same as Fdefine_charset_internal but arguments are more convenient
1280 to call from C (typically in syms_of_charset). This can define a
1281 charset of `offset' method only. Return the ID of the new
1282 charset. */
1283
1284 static int
1285 define_charset_internal (name, dimension, code_space, min_code, max_code,
1286 iso_final, iso_revision, emacs_mule_id,
1287 ascii_compatible, supplementary,
1288 code_offset)
1289 Lisp_Object name;
1290 int dimension;
1291 unsigned char *code_space;
1292 unsigned min_code, max_code;
1293 int iso_final, iso_revision, emacs_mule_id;
1294 int ascii_compatible, supplementary;
1295 int code_offset;
1296 {
1297 Lisp_Object args[charset_arg_max];
1298 Lisp_Object plist[14];
1299 Lisp_Object val;
1300 int i;
1301
1302 args[charset_arg_name] = name;
1303 args[charset_arg_dimension] = make_number (dimension);
1304 val = Fmake_vector (make_number (8), make_number (0));
1305 for (i = 0; i < 8; i++)
1306 ASET (val, i, make_number (code_space[i]));
1307 args[charset_arg_code_space] = val;
1308 args[charset_arg_min_code] = make_number (min_code);
1309 args[charset_arg_max_code] = make_number (max_code);
1310 args[charset_arg_iso_final]
1311 = (iso_final < 0 ? Qnil : make_number (iso_final));
1312 args[charset_arg_iso_revision] = make_number (iso_revision);
1313 args[charset_arg_emacs_mule_id]
1314 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1315 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1316 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1317 args[charset_arg_invalid_code] = Qnil;
1318 args[charset_arg_code_offset] = make_number (code_offset);
1319 args[charset_arg_map] = Qnil;
1320 args[charset_arg_subset] = Qnil;
1321 args[charset_arg_superset] = Qnil;
1322 args[charset_arg_unify_map] = Qnil;
1323
1324 plist[0] = intern_c_string (":name");
1325 plist[1] = args[charset_arg_name];
1326 plist[2] = intern_c_string (":dimension");
1327 plist[3] = args[charset_arg_dimension];
1328 plist[4] = intern_c_string (":code-space");
1329 plist[5] = args[charset_arg_code_space];
1330 plist[6] = intern_c_string (":iso-final-char");
1331 plist[7] = args[charset_arg_iso_final];
1332 plist[8] = intern_c_string (":emacs-mule-id");
1333 plist[9] = args[charset_arg_emacs_mule_id];
1334 plist[10] = intern_c_string (":ascii-compatible-p");
1335 plist[11] = args[charset_arg_ascii_compatible_p];
1336 plist[12] = intern_c_string (":code-offset");
1337 plist[13] = args[charset_arg_code_offset];
1338
1339 args[charset_arg_plist] = Flist (14, plist);
1340 Fdefine_charset_internal (charset_arg_max, args);
1341
1342 return XINT (CHARSET_SYMBOL_ID (name));
1343 }
1344
1345
1346 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1347 Sdefine_charset_alias, 2, 2, 0,
1348 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1349 (alias, charset)
1350 Lisp_Object alias, charset;
1351 {
1352 Lisp_Object attr;
1353
1354 CHECK_CHARSET_GET_ATTR (charset, attr);
1355 Fputhash (alias, attr, Vcharset_hash_table);
1356 Vcharset_list = Fcons (alias, Vcharset_list);
1357 return Qnil;
1358 }
1359
1360
1361 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1362 doc: /* Return the property list of CHARSET. */)
1363 (charset)
1364 Lisp_Object charset;
1365 {
1366 Lisp_Object attrs;
1367
1368 CHECK_CHARSET_GET_ATTR (charset, attrs);
1369 return CHARSET_ATTR_PLIST (attrs);
1370 }
1371
1372
1373 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1374 doc: /* Set CHARSET's property list to PLIST. */)
1375 (charset, plist)
1376 Lisp_Object charset, plist;
1377 {
1378 Lisp_Object attrs;
1379
1380 CHECK_CHARSET_GET_ATTR (charset, attrs);
1381 CHARSET_ATTR_PLIST (attrs) = plist;
1382 return plist;
1383 }
1384
1385
1386 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1387 doc: /* Unify characters of CHARSET with Unicode.
1388 This means reading the relevant file and installing the table defined
1389 by CHARSET's `:unify-map' property.
1390
1391 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1392 the same meaning as the `:unify-map' attribute in the function
1393 `define-charset' (which see).
1394
1395 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1396 (charset, unify_map, deunify)
1397 Lisp_Object charset, unify_map, deunify;
1398 {
1399 int id;
1400 struct charset *cs;
1401
1402 CHECK_CHARSET_GET_ID (charset, id);
1403 cs = CHARSET_FROM_ID (id);
1404 if (NILP (deunify)
1405 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1406 : ! CHARSET_UNIFIED_P (cs))
1407 return Qnil;
1408
1409 CHARSET_UNIFIED_P (cs) = 0;
1410 if (NILP (deunify))
1411 {
1412 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1413 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1414 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1415 if (NILP (unify_map))
1416 unify_map = CHARSET_UNIFY_MAP (cs);
1417 else
1418 {
1419 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1420 signal_error ("Bad unify-map", unify_map);
1421 CHARSET_UNIFY_MAP (cs) = unify_map;
1422 }
1423 if (NILP (Vchar_unify_table))
1424 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1425 char_table_set_range (Vchar_unify_table,
1426 cs->min_char, cs->max_char, charset);
1427 CHARSET_UNIFIED_P (cs) = 1;
1428 }
1429 else if (CHAR_TABLE_P (Vchar_unify_table))
1430 {
1431 int min_code = CHARSET_MIN_CODE (cs);
1432 int max_code = CHARSET_MAX_CODE (cs);
1433 int min_char = DECODE_CHAR (cs, min_code);
1434 int max_char = DECODE_CHAR (cs, max_code);
1435
1436 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1437 }
1438
1439 return Qnil;
1440 }
1441
1442 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1443 Sget_unused_iso_final_char, 2, 2, 0,
1444 doc: /*
1445 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1446 DIMENSION is the number of bytes to represent a character: 1 or 2.
1447 CHARS is the number of characters in a dimension: 94 or 96.
1448
1449 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1450 If there's no unused final char for the specified kind of charset,
1451 return nil. */)
1452 (dimension, chars)
1453 Lisp_Object dimension, chars;
1454 {
1455 int final_char;
1456
1457 CHECK_NUMBER (dimension);
1458 CHECK_NUMBER (chars);
1459 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1460 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1461 if (XINT (chars) != 94 && XINT (chars) != 96)
1462 args_out_of_range_3 (chars, make_number (94), make_number (96));
1463 for (final_char = '0'; final_char <= '?'; final_char++)
1464 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1465 break;
1466 return (final_char <= '?' ? make_number (final_char) : Qnil);
1467 }
1468
1469 static void
1470 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1471 {
1472 CHECK_NATNUM (dimension);
1473 CHECK_NATNUM (chars);
1474 CHECK_NATNUM (final_char);
1475
1476 if (XINT (dimension) > 3)
1477 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1478 if (XINT (chars) != 94 && XINT (chars) != 96)
1479 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1480 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1481 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1482 }
1483
1484
1485 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1486 4, 4, 0,
1487 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1488
1489 On decoding by an ISO-2022 base coding system, when a charset
1490 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1491 if CHARSET is designated instead. */)
1492 (dimension, chars, final_char, charset)
1493 Lisp_Object dimension, chars, final_char, charset;
1494 {
1495 int id;
1496 int chars_flag;
1497
1498 CHECK_CHARSET_GET_ID (charset, id);
1499 check_iso_charset_parameter (dimension, chars, final_char);
1500 chars_flag = XINT (chars) == 96;
1501 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1502 return Qnil;
1503 }
1504
1505
1506 /* Return information about charsets in the text at PTR of NBYTES
1507 bytes, which are NCHARS characters. The value is:
1508
1509 0: Each character is represented by one byte. This is always
1510 true for a unibyte string. For a multibyte string, true if
1511 it contains only ASCII characters.
1512
1513 1: No charsets other than ascii, control-1, and latin-1 are
1514 found.
1515
1516 2: Otherwise.
1517 */
1518
1519 int
1520 string_xstring_p (Lisp_Object string)
1521 {
1522 const unsigned char *p = SDATA (string);
1523 const unsigned char *endp = p + SBYTES (string);
1524
1525 if (SCHARS (string) == SBYTES (string))
1526 return 0;
1527
1528 while (p < endp)
1529 {
1530 int c = STRING_CHAR_ADVANCE (p);
1531
1532 if (c >= 0x100)
1533 return 2;
1534 }
1535 return 1;
1536 }
1537
1538
1539 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1540
1541 CHARSETS is a vector. If Nth element is non-nil, it means the
1542 charset whose id is N is already found.
1543
1544 It may lookup a translation table TABLE if supplied. */
1545
1546 static void
1547 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1548 {
1549 const unsigned char *pend = ptr + nbytes;
1550
1551 if (nchars == nbytes)
1552 {
1553 if (multibyte)
1554 ASET (charsets, charset_ascii, Qt);
1555 else
1556 while (ptr < pend)
1557 {
1558 int c = *ptr++;
1559
1560 if (!NILP (table))
1561 c = translate_char (table, c);
1562 if (ASCII_BYTE_P (c))
1563 ASET (charsets, charset_ascii, Qt);
1564 else
1565 ASET (charsets, charset_eight_bit, Qt);
1566 }
1567 }
1568 else
1569 {
1570 while (ptr < pend)
1571 {
1572 int c = STRING_CHAR_ADVANCE (ptr);
1573 struct charset *charset;
1574
1575 if (!NILP (table))
1576 c = translate_char (table, c);
1577 charset = CHAR_CHARSET (c);
1578 ASET (charsets, CHARSET_ID (charset), Qt);
1579 }
1580 }
1581 }
1582
1583 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1584 2, 3, 0,
1585 doc: /* Return a list of charsets in the region between BEG and END.
1586 BEG and END are buffer positions.
1587 Optional arg TABLE if non-nil is a translation table to look up.
1588
1589 If the current buffer is unibyte, the returned list may contain
1590 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1591 (beg, end, table)
1592 Lisp_Object beg, end, table;
1593 {
1594 Lisp_Object charsets;
1595 EMACS_INT from, from_byte, to, stop, stop_byte;
1596 int i;
1597 Lisp_Object val;
1598 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1599
1600 validate_region (&beg, &end);
1601 from = XFASTINT (beg);
1602 stop = to = XFASTINT (end);
1603
1604 if (from < GPT && GPT < to)
1605 {
1606 stop = GPT;
1607 stop_byte = GPT_BYTE;
1608 }
1609 else
1610 stop_byte = CHAR_TO_BYTE (stop);
1611
1612 from_byte = CHAR_TO_BYTE (from);
1613
1614 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1615 while (1)
1616 {
1617 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1618 stop_byte - from_byte, charsets, table,
1619 multibyte);
1620 if (stop < to)
1621 {
1622 from = stop, from_byte = stop_byte;
1623 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1624 }
1625 else
1626 break;
1627 }
1628
1629 val = Qnil;
1630 for (i = charset_table_used - 1; i >= 0; i--)
1631 if (!NILP (AREF (charsets, i)))
1632 val = Fcons (CHARSET_NAME (charset_table + i), val);
1633 return val;
1634 }
1635
1636 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1637 1, 2, 0,
1638 doc: /* Return a list of charsets in STR.
1639 Optional arg TABLE if non-nil is a translation table to look up.
1640
1641 If STR is unibyte, the returned list may contain
1642 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1643 (str, table)
1644 Lisp_Object str, table;
1645 {
1646 Lisp_Object charsets;
1647 int i;
1648 Lisp_Object val;
1649
1650 CHECK_STRING (str);
1651
1652 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1653 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1654 charsets, table,
1655 STRING_MULTIBYTE (str));
1656 val = Qnil;
1657 for (i = charset_table_used - 1; i >= 0; i--)
1658 if (!NILP (AREF (charsets, i)))
1659 val = Fcons (CHARSET_NAME (charset_table + i), val);
1660 return val;
1661 }
1662
1663 \f
1664
1665 /* Return a unified character code for C (>= 0x110000). VAL is a
1666 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1667 charset symbol. */
1668 int
1669 maybe_unify_char (int c, Lisp_Object val)
1670 {
1671 struct charset *charset;
1672
1673 if (INTEGERP (val))
1674 return XINT (val);
1675 if (NILP (val))
1676 return c;
1677
1678 CHECK_CHARSET_GET_CHARSET (val, charset);
1679 load_charset (charset, 1);
1680 if (! inhibit_load_charset_map)
1681 {
1682 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1683 if (! NILP (val))
1684 c = XINT (val);
1685 }
1686 else
1687 {
1688 int code_index = c - CHARSET_CODE_OFFSET (charset);
1689 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1690
1691 if (unified > 0)
1692 c = unified;
1693 }
1694 return c;
1695 }
1696
1697
1698 /* Return a character correponding to the code-point CODE of
1699 CHARSET. */
1700
1701 int
1702 decode_char (struct charset *charset, unsigned int code)
1703 {
1704 int c, char_index;
1705 enum charset_method method = CHARSET_METHOD (charset);
1706
1707 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1708 return -1;
1709
1710 if (method == CHARSET_METHOD_SUBSET)
1711 {
1712 Lisp_Object subset_info;
1713
1714 subset_info = CHARSET_SUBSET (charset);
1715 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1716 code -= XINT (AREF (subset_info, 3));
1717 if (code < XFASTINT (AREF (subset_info, 1))
1718 || code > XFASTINT (AREF (subset_info, 2)))
1719 c = -1;
1720 else
1721 c = DECODE_CHAR (charset, code);
1722 }
1723 else if (method == CHARSET_METHOD_SUPERSET)
1724 {
1725 Lisp_Object parents;
1726
1727 parents = CHARSET_SUPERSET (charset);
1728 c = -1;
1729 for (; CONSP (parents); parents = XCDR (parents))
1730 {
1731 int id = XINT (XCAR (XCAR (parents)));
1732 int code_offset = XINT (XCDR (XCAR (parents)));
1733 unsigned this_code = code - code_offset;
1734
1735 charset = CHARSET_FROM_ID (id);
1736 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1737 break;
1738 }
1739 }
1740 else
1741 {
1742 char_index = CODE_POINT_TO_INDEX (charset, code);
1743 if (char_index < 0)
1744 return -1;
1745
1746 if (method == CHARSET_METHOD_MAP)
1747 {
1748 Lisp_Object decoder;
1749
1750 decoder = CHARSET_DECODER (charset);
1751 if (! VECTORP (decoder))
1752 {
1753 load_charset (charset, 1);
1754 decoder = CHARSET_DECODER (charset);
1755 }
1756 if (VECTORP (decoder))
1757 c = XINT (AREF (decoder, char_index));
1758 else
1759 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1760 }
1761 else /* method == CHARSET_METHOD_OFFSET */
1762 {
1763 c = char_index + CHARSET_CODE_OFFSET (charset);
1764 if (CHARSET_UNIFIED_P (charset)
1765 && c > MAX_UNICODE_CHAR)
1766 MAYBE_UNIFY_CHAR (c);
1767 }
1768 }
1769
1770 return c;
1771 }
1772
1773 /* Variable used temporarily by the macro ENCODE_CHAR. */
1774 Lisp_Object charset_work;
1775
1776 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1777 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1778 use CHARSET's strict_max_char instead of max_char. */
1779
1780 unsigned
1781 encode_char (struct charset *charset, int c)
1782 {
1783 unsigned code;
1784 enum charset_method method = CHARSET_METHOD (charset);
1785
1786 if (CHARSET_UNIFIED_P (charset))
1787 {
1788 Lisp_Object deunifier;
1789 int code_index = -1;
1790
1791 deunifier = CHARSET_DEUNIFIER (charset);
1792 if (! CHAR_TABLE_P (deunifier))
1793 {
1794 load_charset (charset, 2);
1795 deunifier = CHARSET_DEUNIFIER (charset);
1796 }
1797 if (CHAR_TABLE_P (deunifier))
1798 {
1799 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1800
1801 if (INTEGERP (deunified))
1802 code_index = XINT (deunified);
1803 }
1804 else
1805 {
1806 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1807 }
1808 if (code_index >= 0)
1809 c = CHARSET_CODE_OFFSET (charset) + code_index;
1810 }
1811
1812 if (method == CHARSET_METHOD_SUBSET)
1813 {
1814 Lisp_Object subset_info;
1815 struct charset *this_charset;
1816
1817 subset_info = CHARSET_SUBSET (charset);
1818 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1819 code = ENCODE_CHAR (this_charset, c);
1820 if (code == CHARSET_INVALID_CODE (this_charset)
1821 || code < XFASTINT (AREF (subset_info, 1))
1822 || code > XFASTINT (AREF (subset_info, 2)))
1823 return CHARSET_INVALID_CODE (charset);
1824 code += XINT (AREF (subset_info, 3));
1825 return code;
1826 }
1827
1828 if (method == CHARSET_METHOD_SUPERSET)
1829 {
1830 Lisp_Object parents;
1831
1832 parents = CHARSET_SUPERSET (charset);
1833 for (; CONSP (parents); parents = XCDR (parents))
1834 {
1835 int id = XINT (XCAR (XCAR (parents)));
1836 int code_offset = XINT (XCDR (XCAR (parents)));
1837 struct charset *this_charset = CHARSET_FROM_ID (id);
1838
1839 code = ENCODE_CHAR (this_charset, c);
1840 if (code != CHARSET_INVALID_CODE (this_charset))
1841 return code + code_offset;
1842 }
1843 return CHARSET_INVALID_CODE (charset);
1844 }
1845
1846 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1847 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1848 return CHARSET_INVALID_CODE (charset);
1849
1850 if (method == CHARSET_METHOD_MAP)
1851 {
1852 Lisp_Object encoder;
1853 Lisp_Object val;
1854
1855 encoder = CHARSET_ENCODER (charset);
1856 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1857 {
1858 load_charset (charset, 2);
1859 encoder = CHARSET_ENCODER (charset);
1860 }
1861 if (CHAR_TABLE_P (encoder))
1862 {
1863 val = CHAR_TABLE_REF (encoder, c);
1864 if (NILP (val))
1865 return CHARSET_INVALID_CODE (charset);
1866 code = XINT (val);
1867 if (! CHARSET_COMPACT_CODES_P (charset))
1868 code = INDEX_TO_CODE_POINT (charset, code);
1869 }
1870 else
1871 {
1872 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1873 code = INDEX_TO_CODE_POINT (charset, code);
1874 }
1875 }
1876 else /* method == CHARSET_METHOD_OFFSET */
1877 {
1878 int code_index = c - CHARSET_CODE_OFFSET (charset);
1879
1880 code = INDEX_TO_CODE_POINT (charset, code_index);
1881 }
1882
1883 return code;
1884 }
1885
1886
1887 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1888 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1889 Return nil if CODE-POINT is not valid in CHARSET.
1890
1891 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1892
1893 Optional argument RESTRICTION specifies a way to map the pair of CCS
1894 and CODE-POINT to a character. Currently not supported and just ignored. */)
1895 (charset, code_point, restriction)
1896 Lisp_Object charset, code_point, restriction;
1897 {
1898 int c, id;
1899 unsigned code;
1900 struct charset *charsetp;
1901
1902 CHECK_CHARSET_GET_ID (charset, id);
1903 if (CONSP (code_point))
1904 {
1905 CHECK_NATNUM_CAR (code_point);
1906 CHECK_NATNUM_CDR (code_point);
1907 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1908 }
1909 else
1910 {
1911 CHECK_NATNUM (code_point);
1912 code = XINT (code_point);
1913 }
1914 charsetp = CHARSET_FROM_ID (id);
1915 c = DECODE_CHAR (charsetp, code);
1916 return (c >= 0 ? make_number (c) : Qnil);
1917 }
1918
1919
1920 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1921 doc: /* Encode the character CH into a code-point of CHARSET.
1922 Return nil if CHARSET doesn't include CH.
1923
1924 Optional argument RESTRICTION specifies a way to map CH to a
1925 code-point in CCS. Currently not supported and just ignored. */)
1926 (ch, charset, restriction)
1927 Lisp_Object ch, charset, restriction;
1928 {
1929 int id;
1930 unsigned code;
1931 struct charset *charsetp;
1932
1933 CHECK_CHARSET_GET_ID (charset, id);
1934 CHECK_NATNUM (ch);
1935 charsetp = CHARSET_FROM_ID (id);
1936 code = ENCODE_CHAR (charsetp, XINT (ch));
1937 if (code == CHARSET_INVALID_CODE (charsetp))
1938 return Qnil;
1939 if (code > 0x7FFFFFF)
1940 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1941 return make_number (code);
1942 }
1943
1944
1945 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1946 doc:
1947 /* Return a character of CHARSET whose position codes are CODEn.
1948
1949 CODE1 through CODE4 are optional, but if you don't supply sufficient
1950 position codes, it is assumed that the minimum code in each dimension
1951 is specified. */)
1952 (charset, code1, code2, code3, code4)
1953 Lisp_Object charset, code1, code2, code3, code4;
1954 {
1955 int id, dimension;
1956 struct charset *charsetp;
1957 unsigned code;
1958 int c;
1959
1960 CHECK_CHARSET_GET_ID (charset, id);
1961 charsetp = CHARSET_FROM_ID (id);
1962
1963 dimension = CHARSET_DIMENSION (charsetp);
1964 if (NILP (code1))
1965 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1966 ? 0 : CHARSET_MIN_CODE (charsetp));
1967 else
1968 {
1969 CHECK_NATNUM (code1);
1970 if (XFASTINT (code1) >= 0x100)
1971 args_out_of_range (make_number (0xFF), code1);
1972 code = XFASTINT (code1);
1973
1974 if (dimension > 1)
1975 {
1976 code <<= 8;
1977 if (NILP (code2))
1978 code |= charsetp->code_space[(dimension - 2) * 4];
1979 else
1980 {
1981 CHECK_NATNUM (code2);
1982 if (XFASTINT (code2) >= 0x100)
1983 args_out_of_range (make_number (0xFF), code2);
1984 code |= XFASTINT (code2);
1985 }
1986
1987 if (dimension > 2)
1988 {
1989 code <<= 8;
1990 if (NILP (code3))
1991 code |= charsetp->code_space[(dimension - 3) * 4];
1992 else
1993 {
1994 CHECK_NATNUM (code3);
1995 if (XFASTINT (code3) >= 0x100)
1996 args_out_of_range (make_number (0xFF), code3);
1997 code |= XFASTINT (code3);
1998 }
1999
2000 if (dimension > 3)
2001 {
2002 code <<= 8;
2003 if (NILP (code4))
2004 code |= charsetp->code_space[0];
2005 else
2006 {
2007 CHECK_NATNUM (code4);
2008 if (XFASTINT (code4) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code4);
2010 code |= XFASTINT (code4);
2011 }
2012 }
2013 }
2014 }
2015 }
2016
2017 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2018 code &= 0x7F7F7F7F;
2019 c = DECODE_CHAR (charsetp, code);
2020 if (c < 0)
2021 error ("Invalid code(s)");
2022 return make_number (c);
2023 }
2024
2025
2026 /* Return the first charset in CHARSET_LIST that contains C.
2027 CHARSET_LIST is a list of charset IDs. If it is nil, use
2028 Vcharset_ordered_list. */
2029
2030 struct charset *
2031 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2032 {
2033 int maybe_null = 0;
2034
2035 if (NILP (charset_list))
2036 charset_list = Vcharset_ordered_list;
2037 else
2038 maybe_null = 1;
2039
2040 while (CONSP (charset_list))
2041 {
2042 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2043 unsigned code = ENCODE_CHAR (charset, c);
2044
2045 if (code != CHARSET_INVALID_CODE (charset))
2046 {
2047 if (code_return)
2048 *code_return = code;
2049 return charset;
2050 }
2051 charset_list = XCDR (charset_list);
2052 if (! maybe_null
2053 && c <= MAX_UNICODE_CHAR
2054 && EQ (charset_list, Vcharset_non_preferred_head))
2055 return CHARSET_FROM_ID (charset_unicode);
2056 }
2057 return (maybe_null ? NULL
2058 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2059 : CHARSET_FROM_ID (charset_eight_bit));
2060 }
2061
2062
2063 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2064 doc:
2065 /*Return list of charset and one to four position-codes of CH.
2066 The charset is decided by the current priority order of charsets.
2067 A position-code is a byte value of each dimension of the code-point of
2068 CH in the charset. */)
2069 (ch)
2070 Lisp_Object ch;
2071 {
2072 struct charset *charset;
2073 int c, dimension;
2074 unsigned code;
2075 Lisp_Object val;
2076
2077 CHECK_CHARACTER (ch);
2078 c = XFASTINT (ch);
2079 charset = CHAR_CHARSET (c);
2080 if (! charset)
2081 abort ();
2082 code = ENCODE_CHAR (charset, c);
2083 if (code == CHARSET_INVALID_CODE (charset))
2084 abort ();
2085 dimension = CHARSET_DIMENSION (charset);
2086 for (val = Qnil; dimension > 0; dimension--)
2087 {
2088 val = Fcons (make_number (code & 0xFF), val);
2089 code >>= 8;
2090 }
2091 return Fcons (CHARSET_NAME (charset), val);
2092 }
2093
2094
2095 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2096 doc: /* Return the charset of highest priority that contains CH.
2097 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2098 from which to find the charset. It may also be a coding system. In
2099 that case, find the charset from what supported by that coding system. */)
2100 (ch, restriction)
2101 Lisp_Object ch, restriction;
2102 {
2103 struct charset *charset;
2104
2105 CHECK_CHARACTER (ch);
2106 if (NILP (restriction))
2107 charset = CHAR_CHARSET (XINT (ch));
2108 else
2109 {
2110 Lisp_Object charset_list;
2111
2112 if (CONSP (restriction))
2113 {
2114 for (charset_list = Qnil; CONSP (restriction);
2115 restriction = XCDR (restriction))
2116 {
2117 int id;
2118
2119 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2120 charset_list = Fcons (make_number (id), charset_list);
2121 }
2122 charset_list = Fnreverse (charset_list);
2123 }
2124 else
2125 charset_list = coding_system_charset_list (restriction);
2126 charset = char_charset (XINT (ch), charset_list, NULL);
2127 if (! charset)
2128 return Qnil;
2129 }
2130 return (CHARSET_NAME (charset));
2131 }
2132
2133
2134 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2135 doc: /*
2136 Return charset of a character in the current buffer at position POS.
2137 If POS is nil, it defauls to the current point.
2138 If POS is out of range, the value is nil. */)
2139 (pos)
2140 Lisp_Object pos;
2141 {
2142 Lisp_Object ch;
2143 struct charset *charset;
2144
2145 ch = Fchar_after (pos);
2146 if (! INTEGERP (ch))
2147 return ch;
2148 charset = CHAR_CHARSET (XINT (ch));
2149 return (CHARSET_NAME (charset));
2150 }
2151
2152
2153 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2154 doc: /*
2155 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2156
2157 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2158 by their DIMENSION, CHARS, and FINAL-CHAR,
2159 whereas Emacs distinguishes them by charset symbol.
2160 See the documentation of the function `charset-info' for the meanings of
2161 DIMENSION, CHARS, and FINAL-CHAR. */)
2162 (dimension, chars, final_char)
2163 Lisp_Object dimension, chars, final_char;
2164 {
2165 int id;
2166 int chars_flag;
2167
2168 check_iso_charset_parameter (dimension, chars, final_char);
2169 chars_flag = XFASTINT (chars) == 96;
2170 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2171 XFASTINT (final_char));
2172 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2173 }
2174
2175
2176 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2177 0, 0, 0,
2178 doc: /*
2179 Internal use only.
2180 Clear temporary charset mapping tables.
2181 It should be called only from temacs invoked for dumping. */)
2182 ()
2183 {
2184 if (temp_charset_work)
2185 {
2186 free (temp_charset_work);
2187 temp_charset_work = NULL;
2188 }
2189
2190 if (CHAR_TABLE_P (Vchar_unify_table))
2191 Foptimize_char_table (Vchar_unify_table, Qnil);
2192
2193 return Qnil;
2194 }
2195
2196 DEFUN ("charset-priority-list", Fcharset_priority_list,
2197 Scharset_priority_list, 0, 1, 0,
2198 doc: /* Return the list of charsets ordered by priority.
2199 HIGHESTP non-nil means just return the highest priority one. */)
2200 (highestp)
2201 Lisp_Object highestp;
2202 {
2203 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2204
2205 if (!NILP (highestp))
2206 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2207
2208 while (!NILP (list))
2209 {
2210 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2211 list = XCDR (list);
2212 }
2213 return Fnreverse (val);
2214 }
2215
2216 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2217 1, MANY, 0,
2218 doc: /* Assign higher priority to the charsets given as arguments.
2219 usage: (set-charset-priority &rest charsets) */)
2220 (nargs, args)
2221 int nargs;
2222 Lisp_Object *args;
2223 {
2224 Lisp_Object new_head, old_list, arglist[2];
2225 Lisp_Object list_2022, list_emacs_mule;
2226 int i, id;
2227
2228 old_list = Fcopy_sequence (Vcharset_ordered_list);
2229 new_head = Qnil;
2230 for (i = 0; i < nargs; i++)
2231 {
2232 CHECK_CHARSET_GET_ID (args[i], id);
2233 if (! NILP (Fmemq (make_number (id), old_list)))
2234 {
2235 old_list = Fdelq (make_number (id), old_list);
2236 new_head = Fcons (make_number (id), new_head);
2237 }
2238 }
2239 arglist[0] = Fnreverse (new_head);
2240 arglist[1] = Vcharset_non_preferred_head = old_list;
2241 Vcharset_ordered_list = Fnconc (2, arglist);
2242 charset_ordered_list_tick++;
2243
2244 charset_unibyte = -1;
2245 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2246 CONSP (old_list); old_list = XCDR (old_list))
2247 {
2248 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2249 list_2022 = Fcons (XCAR (old_list), list_2022);
2250 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2251 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2252 if (charset_unibyte < 0)
2253 {
2254 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2255
2256 if (CHARSET_DIMENSION (charset) == 1
2257 && CHARSET_ASCII_COMPATIBLE_P (charset)
2258 && CHARSET_MAX_CHAR (charset) >= 0x80)
2259 charset_unibyte = CHARSET_ID (charset);
2260 }
2261 }
2262 Viso_2022_charset_list = Fnreverse (list_2022);
2263 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2264 if (charset_unibyte < 0)
2265 charset_unibyte = charset_iso_8859_1;
2266
2267 return Qnil;
2268 }
2269
2270 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2271 0, 1, 0,
2272 doc: /* Internal use only.
2273 Return charset identification number of CHARSET. */)
2274 (charset)
2275 Lisp_Object charset;
2276 {
2277 int id;
2278
2279 CHECK_CHARSET_GET_ID (charset, id);
2280 return make_number (id);
2281 }
2282
2283 \f
2284 void
2285 init_charset (void)
2286 {
2287 Lisp_Object tempdir;
2288 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2289 if (access ((char *) SDATA (tempdir), 0) < 0)
2290 {
2291 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2292 Emacs will not function correctly without the character map files.\n\
2293 Please check your installation!\n",
2294 tempdir);
2295 /* TODO should this be a fatal error? (Bug#909) */
2296 }
2297
2298 Vcharset_map_path = Fcons (tempdir, Qnil);
2299 }
2300
2301
2302 void
2303 init_charset_once (void)
2304 {
2305 int i, j, k;
2306
2307 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2308 for (j = 0; j < ISO_MAX_CHARS; j++)
2309 for (k = 0; k < ISO_MAX_FINAL; k++)
2310 iso_charset_table[i][j][k] = -1;
2311
2312 for (i = 0; i < 256; i++)
2313 emacs_mule_charset[i] = NULL;
2314
2315 charset_jisx0201_roman = -1;
2316 charset_jisx0208_1978 = -1;
2317 charset_jisx0208 = -1;
2318 charset_ksc5601 = -1;
2319 }
2320
2321 #ifdef emacs
2322
2323 void
2324 syms_of_charset (void)
2325 {
2326 DEFSYM (Qcharsetp, "charsetp");
2327
2328 DEFSYM (Qascii, "ascii");
2329 DEFSYM (Qunicode, "unicode");
2330 DEFSYM (Qemacs, "emacs");
2331 DEFSYM (Qeight_bit, "eight-bit");
2332 DEFSYM (Qiso_8859_1, "iso-8859-1");
2333
2334 DEFSYM (Qgl, "gl");
2335 DEFSYM (Qgr, "gr");
2336
2337 staticpro (&Vcharset_ordered_list);
2338 Vcharset_ordered_list = Qnil;
2339
2340 staticpro (&Viso_2022_charset_list);
2341 Viso_2022_charset_list = Qnil;
2342
2343 staticpro (&Vemacs_mule_charset_list);
2344 Vemacs_mule_charset_list = Qnil;
2345
2346 /* Don't staticpro them here. It's done in syms_of_fns. */
2347 QCtest = intern (":test");
2348 Qeq = intern ("eq");
2349
2350 staticpro (&Vcharset_hash_table);
2351 {
2352 Lisp_Object args[2];
2353 args[0] = QCtest;
2354 args[1] = Qeq;
2355 Vcharset_hash_table = Fmake_hash_table (2, args);
2356 }
2357
2358 charset_table_size = 128;
2359 charset_table = ((struct charset *)
2360 xmalloc (sizeof (struct charset) * charset_table_size));
2361 charset_table_used = 0;
2362
2363 defsubr (&Scharsetp);
2364 defsubr (&Smap_charset_chars);
2365 defsubr (&Sdefine_charset_internal);
2366 defsubr (&Sdefine_charset_alias);
2367 defsubr (&Scharset_plist);
2368 defsubr (&Sset_charset_plist);
2369 defsubr (&Sunify_charset);
2370 defsubr (&Sget_unused_iso_final_char);
2371 defsubr (&Sdeclare_equiv_charset);
2372 defsubr (&Sfind_charset_region);
2373 defsubr (&Sfind_charset_string);
2374 defsubr (&Sdecode_char);
2375 defsubr (&Sencode_char);
2376 defsubr (&Ssplit_char);
2377 defsubr (&Smake_char);
2378 defsubr (&Schar_charset);
2379 defsubr (&Scharset_after);
2380 defsubr (&Siso_charset);
2381 defsubr (&Sclear_charset_maps);
2382 defsubr (&Scharset_priority_list);
2383 defsubr (&Sset_charset_priority);
2384 defsubr (&Scharset_id_internal);
2385
2386 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2387 doc: /* *List of directories to search for charset map files. */);
2388 Vcharset_map_path = Qnil;
2389
2390 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2391 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2392 inhibit_load_charset_map = 0;
2393
2394 DEFVAR_LISP ("charset-list", &Vcharset_list,
2395 doc: /* List of all charsets ever defined. */);
2396 Vcharset_list = Qnil;
2397
2398 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2399 doc: /* ISO639 language mnemonic symbol for the current language environment.
2400 If the current language environment is for multiple languages (e.g. "Latin-1"),
2401 the value may be a list of mnemonics. */);
2402 Vcurrent_iso639_language = Qnil;
2403
2404 charset_ascii
2405 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2406 0, 127, 'B', -1, 0, 1, 0, 0);
2407 charset_iso_8859_1
2408 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2409 0, 255, -1, -1, -1, 1, 0, 0);
2410 charset_unicode
2411 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2412 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2413 charset_emacs
2414 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2415 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2416 charset_eight_bit
2417 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2418 128, 255, -1, 0, -1, 0, 1,
2419 MAX_5_BYTE_CHAR + 1);
2420 charset_unibyte = charset_iso_8859_1;
2421 }
2422
2423 #endif /* emacs */
2424
2425 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2426 (do not change this comment) */