Merge from mainline.
[bpt/emacs.git] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
41
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
49
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
54
55 */
56
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
60
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
64
65 /* Table of struct charset. */
66 struct charset *charset_table;
67
68 static int charset_table_size;
69 static int charset_table_used;
70
71 Lisp_Object Qcharsetp;
72
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
79
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
86
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
92
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
95
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
98
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
101
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
105
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
110
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
113
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
116
117 struct charset *emacs_mule_charset[256];
118
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
123 Lisp_Object Vcharset_map_path;
124
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
127
128 Lisp_Object Vcurrent_iso639_language;
129
130 /* Defined in chartab.c */
131 extern void
132 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to));
136
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
152 : -1)
153
154
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
171
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
174
175 static struct
176 {
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205 } *temp_charset_work;
206
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
229 \f
230
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded;
234
235 struct charset_map_entries
236 {
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242 };
243
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
253
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
259
260 If CONTROL_FLAG is 2, setup the following tables.
261
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 */
268
269 static void
270 load_charset_map (charset, entries, n_entries, control_flag)
271 struct charset *charset;
272 struct charset_map_entries *entries;
273 int n_entries;
274 int control_flag;
275 {
276 Lisp_Object vec, table;
277 unsigned max_code = CHARSET_MAX_CODE (charset);
278 int ascii_compatible_p = charset->ascii_compatible_p;
279 int min_char, max_char, nonascii_min_char;
280 int i;
281 unsigned char *fast_map = charset->fast_map;
282
283 if (n_entries <= 0)
284 return;
285
286 if (control_flag)
287 {
288 if (! inhibit_load_charset_map)
289 {
290 if (control_flag == 1)
291 {
292 if (charset->method == CHARSET_METHOD_MAP)
293 {
294 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
295
296 vec = CHARSET_DECODER (charset)
297 = Fmake_vector (make_number (n), make_number (-1));
298 }
299 else
300 {
301 char_table_set_range (Vchar_unify_table,
302 charset->min_char, charset->max_char,
303 Qnil);
304 }
305 }
306 else
307 {
308 table = Fmake_char_table (Qnil, Qnil);
309 if (charset->method == CHARSET_METHOD_MAP)
310 CHARSET_ENCODER (charset) = table;
311 else
312 CHARSET_DEUNIFIER (charset) = table;
313 }
314 }
315 else
316 {
317 if (! temp_charset_work)
318 temp_charset_work = malloc (sizeof (*temp_charset_work));
319 if (control_flag == 1)
320 {
321 memset (temp_charset_work->table.decoder, -1,
322 sizeof (int) * 0x10000);
323 }
324 else
325 {
326 memset (temp_charset_work->table.encoder, 0,
327 sizeof (unsigned short) * 0x20000);
328 temp_charset_work->zero_index_char = -1;
329 }
330 temp_charset_work->current = charset;
331 temp_charset_work->for_encoder = (control_flag == 2);
332 control_flag += 2;
333 }
334 charset_map_loaded = 1;
335 }
336
337 min_char = max_char = entries->entry[0].c;
338 nonascii_min_char = MAX_CHAR;
339 for (i = 0; i < n_entries; i++)
340 {
341 unsigned from, to;
342 int from_index, to_index;
343 int from_c, to_c;
344 int idx = i % 0x10000;
345
346 if (i > 0 && idx == 0)
347 entries = entries->next;
348 from = entries->entry[idx].from;
349 to = entries->entry[idx].to;
350 from_c = entries->entry[idx].c;
351 from_index = CODE_POINT_TO_INDEX (charset, from);
352 if (from == to)
353 {
354 to_index = from_index;
355 to_c = from_c;
356 }
357 else
358 {
359 to_index = CODE_POINT_TO_INDEX (charset, to);
360 to_c = from_c + (to_index - from_index);
361 }
362 if (from_index < 0 || to_index < 0)
363 continue;
364
365 if (to_c > max_char)
366 max_char = to_c;
367 else if (from_c < min_char)
368 min_char = from_c;
369
370 if (control_flag == 1)
371 {
372 if (charset->method == CHARSET_METHOD_MAP)
373 for (; from_index <= to_index; from_index++, from_c++)
374 ASET (vec, from_index, make_number (from_c));
375 else
376 for (; from_index <= to_index; from_index++, from_c++)
377 CHAR_TABLE_SET (Vchar_unify_table,
378 CHARSET_CODE_OFFSET (charset) + from_index,
379 make_number (from_c));
380 }
381 else if (control_flag == 2)
382 {
383 if (charset->method == CHARSET_METHOD_MAP
384 && CHARSET_COMPACT_CODES_P (charset))
385 for (; from_index <= to_index; from_index++, from_c++)
386 {
387 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
388
389 if (NILP (CHAR_TABLE_REF (table, from_c)))
390 CHAR_TABLE_SET (table, from_c, make_number (code));
391 }
392 else
393 for (; from_index <= to_index; from_index++, from_c++)
394 {
395 if (NILP (CHAR_TABLE_REF (table, from_c)))
396 CHAR_TABLE_SET (table, from_c, make_number (from_index));
397 }
398 }
399 else if (control_flag == 3)
400 for (; from_index <= to_index; from_index++, from_c++)
401 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
402 else if (control_flag == 4)
403 for (; from_index <= to_index; from_index++, from_c++)
404 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
405 else /* control_flag == 0 */
406 {
407 if (ascii_compatible_p)
408 {
409 if (! ASCII_BYTE_P (from_c))
410 {
411 if (from_c < nonascii_min_char)
412 nonascii_min_char = from_c;
413 }
414 else if (! ASCII_BYTE_P (to_c))
415 {
416 nonascii_min_char = 0x80;
417 }
418 }
419
420 for (; from_c <= to_c; from_c++)
421 CHARSET_FAST_MAP_SET (from_c, fast_map);
422 }
423 }
424
425 if (control_flag == 0)
426 {
427 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
428 ? nonascii_min_char : min_char);
429 CHARSET_MAX_CHAR (charset) = max_char;
430 }
431 else if (control_flag == 4)
432 {
433 temp_charset_work->min_char = min_char;
434 temp_charset_work->max_char = max_char;
435 }
436 }
437
438
439 /* Read a hexadecimal number (preceded by "0x") from the file FP while
440 paying attention to comment charcter '#'. */
441
442 static INLINE unsigned
443 read_hex (fp, eof)
444 FILE *fp;
445 int *eof;
446 {
447 int c;
448 unsigned n;
449
450 while ((c = getc (fp)) != EOF)
451 {
452 if (c == '#')
453 {
454 while ((c = getc (fp)) != EOF && c != '\n');
455 }
456 else if (c == '0')
457 {
458 if ((c = getc (fp)) == EOF || c == 'x')
459 break;
460 }
461 }
462 if (c == EOF)
463 {
464 *eof = 1;
465 return 0;
466 }
467 *eof = 0;
468 n = 0;
469 if (c == 'x')
470 while ((c = getc (fp)) != EOF && isxdigit (c))
471 n = ((n << 4)
472 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
473 else
474 while ((c = getc (fp)) != EOF && isdigit (c))
475 n = (n * 10) + c - '0';
476 if (c != EOF)
477 ungetc (c, fp);
478 return n;
479 }
480
481 extern Lisp_Object Qfile_name_handler_alist;
482
483 /* Return a mapping vector for CHARSET loaded from MAPFILE.
484 Each line of MAPFILE has this form
485 0xAAAA 0xCCCC
486 where 0xAAAA is a code-point and 0xCCCC is the corresponding
487 character code, or this form
488 0xAAAA-0xBBBB 0xCCCC
489 where 0xAAAA and 0xBBBB are code-points specifying a range, and
490 0xCCCC is the first character code of the range.
491
492 The returned vector has this form:
493 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
494 where CODE1 is a code-point or a cons of code-points specifying a
495 range.
496
497 Note that this function uses `openp' to open MAPFILE but ignores
498 `file-name-handler-alist' to avoid running any Lisp code. */
499
500 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
501
502 static void
503 load_charset_map_from_file (charset, mapfile, control_flag)
504 struct charset *charset;
505 Lisp_Object mapfile;
506 int control_flag;
507 {
508 unsigned min_code = CHARSET_MIN_CODE (charset);
509 unsigned max_code = CHARSET_MAX_CODE (charset);
510 int fd;
511 FILE *fp;
512 int eof;
513 Lisp_Object suffixes;
514 struct charset_map_entries *head, *entries;
515 int n_entries, count;
516 USE_SAFE_ALLOCA;
517
518 suffixes = Fcons (build_string (".map"),
519 Fcons (build_string (".TXT"), Qnil));
520
521 count = SPECPDL_INDEX ();
522 specbind (Qfile_name_handler_alist, Qnil);
523 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
524 unbind_to (count, Qnil);
525 if (fd < 0
526 || ! (fp = fdopen (fd, "r")))
527 error ("Failure in loading charset map: %S", SDATA (mapfile));
528
529 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
530 large (larger than MAX_ALLOCA). */
531 SAFE_ALLOCA (head, struct charset_map_entries *,
532 sizeof (struct charset_map_entries));
533 entries = head;
534 bzero (entries, sizeof (struct charset_map_entries));
535
536 n_entries = 0;
537 eof = 0;
538 while (1)
539 {
540 unsigned from, to;
541 int c;
542 int idx;
543
544 from = read_hex (fp, &eof);
545 if (eof)
546 break;
547 if (getc (fp) == '-')
548 to = read_hex (fp, &eof);
549 else
550 to = from;
551 c = (int) read_hex (fp, &eof);
552
553 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
554 continue;
555
556 if (n_entries > 0 && (n_entries % 0x10000) == 0)
557 {
558 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
559 sizeof (struct charset_map_entries));
560 entries = entries->next;
561 bzero (entries, sizeof (struct charset_map_entries));
562 }
563 idx = n_entries % 0x10000;
564 entries->entry[idx].from = from;
565 entries->entry[idx].to = to;
566 entries->entry[idx].c = c;
567 n_entries++;
568 }
569 fclose (fp);
570 close (fd);
571
572 load_charset_map (charset, head, n_entries, control_flag);
573 SAFE_FREE ();
574 }
575
576 static void
577 load_charset_map_from_vector (charset, vec, control_flag)
578 struct charset *charset;
579 Lisp_Object vec;
580 int control_flag;
581 {
582 unsigned min_code = CHARSET_MIN_CODE (charset);
583 unsigned max_code = CHARSET_MAX_CODE (charset);
584 struct charset_map_entries *head, *entries;
585 int n_entries;
586 int len = ASIZE (vec);
587 int i;
588 USE_SAFE_ALLOCA;
589
590 if (len % 2 == 1)
591 {
592 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
593 return;
594 }
595
596 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
597 large (larger than MAX_ALLOCA). */
598 SAFE_ALLOCA (head, struct charset_map_entries *,
599 sizeof (struct charset_map_entries));
600 entries = head;
601 bzero (entries, sizeof (struct charset_map_entries));
602
603 n_entries = 0;
604 for (i = 0; i < len; i += 2)
605 {
606 Lisp_Object val, val2;
607 unsigned from, to;
608 int c;
609 int idx;
610
611 val = AREF (vec, i);
612 if (CONSP (val))
613 {
614 val2 = XCDR (val);
615 val = XCAR (val);
616 CHECK_NATNUM (val);
617 CHECK_NATNUM (val2);
618 from = XFASTINT (val);
619 to = XFASTINT (val2);
620 }
621 else
622 {
623 CHECK_NATNUM (val);
624 from = to = XFASTINT (val);
625 }
626 val = AREF (vec, i + 1);
627 CHECK_NATNUM (val);
628 c = XFASTINT (val);
629
630 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
631 continue;
632
633 if (n_entries > 0 && (n_entries % 0x10000) == 0)
634 {
635 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
636 sizeof (struct charset_map_entries));
637 entries = entries->next;
638 bzero (entries, sizeof (struct charset_map_entries));
639 }
640 idx = n_entries % 0x10000;
641 entries->entry[idx].from = from;
642 entries->entry[idx].to = to;
643 entries->entry[idx].c = c;
644 n_entries++;
645 }
646
647 load_charset_map (charset, head, n_entries, control_flag);
648 SAFE_FREE ();
649 }
650
651
652 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
653 map it is (see the comment of load_charset_map for the detail). */
654
655 static void
656 load_charset (charset, control_flag)
657 struct charset *charset;
658 int control_flag;
659 {
660 Lisp_Object map;
661
662 if (inhibit_load_charset_map
663 && temp_charset_work
664 && charset == temp_charset_work->current
665 && ((control_flag == 2) == temp_charset_work->for_encoder))
666 return;
667
668 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
669 map = CHARSET_MAP (charset);
670 else if (CHARSET_UNIFIED_P (charset))
671 map = CHARSET_UNIFY_MAP (charset);
672 if (STRINGP (map))
673 load_charset_map_from_file (charset, map, control_flag);
674 else
675 load_charset_map_from_vector (charset, map, control_flag);
676 }
677
678
679 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
680 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
681 (object)
682 Lisp_Object object;
683 {
684 return (CHARSETP (object) ? Qt : Qnil);
685 }
686
687
688 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
689 Lisp_Object function, Lisp_Object arg,
690 unsigned from, unsigned to));
691
692 void
693 map_charset_for_dump (c_function, function, arg, from, to)
694 void (*c_function) (Lisp_Object, Lisp_Object);
695 Lisp_Object function, arg;
696 unsigned from, to;
697 {
698 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
699 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
700 Lisp_Object range;
701 int c, stop;
702 struct gcpro gcpro1;
703
704 range = Fcons (Qnil, Qnil);
705 GCPRO1 (range);
706
707 c = temp_charset_work->min_char;
708 stop = (temp_charset_work->max_char < 0x20000
709 ? temp_charset_work->max_char : 0xFFFF);
710
711 while (1)
712 {
713 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
714
715 if (index >= from_idx && index <= to_idx)
716 {
717 if (NILP (XCAR (range)))
718 XSETCAR (range, make_number (c));
719 }
720 else if (! NILP (XCAR (range)))
721 {
722 XSETCDR (range, make_number (c - 1));
723 if (c_function)
724 (*c_function) (arg, range);
725 else
726 call2 (function, range, arg);
727 XSETCAR (range, Qnil);
728 }
729 if (c == stop)
730 {
731 if (c == temp_charset_work->max_char)
732 {
733 if (! NILP (XCAR (range)))
734 {
735 XSETCDR (range, make_number (c));
736 if (c_function)
737 (*c_function) (arg, range);
738 else
739 call2 (function, range, arg);
740 }
741 break;
742 }
743 c = 0x1FFFF;
744 stop = temp_charset_work->max_char;
745 }
746 c++;
747 }
748 UNGCPRO;
749 }
750
751 void
752 map_charset_chars (c_function, function, arg,
753 charset, from, to)
754 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
755 Lisp_Object function, arg;
756 struct charset *charset;
757 unsigned from, to;
758 {
759 Lisp_Object range;
760 int partial;
761
762 partial = (from > CHARSET_MIN_CODE (charset)
763 || to < CHARSET_MAX_CODE (charset));
764
765 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
766 {
767 int from_idx = CODE_POINT_TO_INDEX (charset, from);
768 int to_idx = CODE_POINT_TO_INDEX (charset, to);
769 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
770 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
771
772 if (CHARSET_UNIFIED_P (charset))
773 {
774 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
775 load_charset (charset, 2);
776 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
777 map_char_table_for_charset (c_function, function,
778 CHARSET_DEUNIFIER (charset), arg,
779 partial ? charset : NULL, from, to);
780 else
781 map_charset_for_dump (c_function, function, arg, from, to);
782 }
783
784 range = Fcons (make_number (from_c), make_number (to_c));
785 if (NILP (function))
786 (*c_function) (arg, range);
787 else
788 call2 (function, range, arg);
789 }
790 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
791 {
792 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
793 load_charset (charset, 2);
794 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
795 map_char_table_for_charset (c_function, function,
796 CHARSET_ENCODER (charset), arg,
797 partial ? charset : NULL, from, to);
798 else
799 map_charset_for_dump (c_function, function, arg, from, to);
800 }
801 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
802 {
803 Lisp_Object subset_info;
804 int offset;
805
806 subset_info = CHARSET_SUBSET (charset);
807 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
808 offset = XINT (AREF (subset_info, 3));
809 from -= offset;
810 if (from < XFASTINT (AREF (subset_info, 1)))
811 from = XFASTINT (AREF (subset_info, 1));
812 to -= offset;
813 if (to > XFASTINT (AREF (subset_info, 2)))
814 to = XFASTINT (AREF (subset_info, 2));
815 map_charset_chars (c_function, function, arg, charset, from, to);
816 }
817 else /* i.e. CHARSET_METHOD_SUPERSET */
818 {
819 Lisp_Object parents;
820
821 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
822 parents = XCDR (parents))
823 {
824 int offset;
825 unsigned this_from, this_to;
826
827 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
828 offset = XINT (XCDR (XCAR (parents)));
829 this_from = from > offset ? from - offset : 0;
830 this_to = to > offset ? to - offset : 0;
831 if (this_from < CHARSET_MIN_CODE (charset))
832 this_from = CHARSET_MIN_CODE (charset);
833 if (this_to > CHARSET_MAX_CODE (charset))
834 this_to = CHARSET_MAX_CODE (charset);
835 map_charset_chars (c_function, function, arg, charset,
836 this_from, this_to);
837 }
838 }
839 }
840
841 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
842 doc: /* Call FUNCTION for all characters in CHARSET.
843 FUNCTION is called with an argument RANGE and the optional 3rd
844 argument ARG.
845
846 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
847 characters contained in CHARSET.
848
849 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
850 range of code points (in CHARSET) of target characters. */)
851 (function, charset, arg, from_code, to_code)
852 Lisp_Object function, charset, arg, from_code, to_code;
853 {
854 struct charset *cs;
855 unsigned from, to;
856
857 CHECK_CHARSET_GET_CHARSET (charset, cs);
858 if (NILP (from_code))
859 from = CHARSET_MIN_CODE (cs);
860 else
861 {
862 CHECK_NATNUM (from_code);
863 from = XINT (from_code);
864 if (from < CHARSET_MIN_CODE (cs))
865 from = CHARSET_MIN_CODE (cs);
866 }
867 if (NILP (to_code))
868 to = CHARSET_MAX_CODE (cs);
869 else
870 {
871 CHECK_NATNUM (to_code);
872 to = XINT (to_code);
873 if (to > CHARSET_MAX_CODE (cs))
874 to = CHARSET_MAX_CODE (cs);
875 }
876 map_charset_chars (NULL, function, arg, cs, from, to);
877 return Qnil;
878 }
879
880
881 /* Define a charset according to the arguments. The Nth argument is
882 the Nth attribute of the charset (the last attribute `charset-id'
883 is not included). See the docstring of `define-charset' for the
884 detail. */
885
886 DEFUN ("define-charset-internal", Fdefine_charset_internal,
887 Sdefine_charset_internal, charset_arg_max, MANY, 0,
888 doc: /* For internal use only.
889 usage: (define-charset-internal ...) */)
890 (nargs, args)
891 int nargs;
892 Lisp_Object *args;
893 {
894 /* Charset attr vector. */
895 Lisp_Object attrs;
896 Lisp_Object val;
897 unsigned hash_code;
898 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
899 int i, j;
900 struct charset charset;
901 int id;
902 int dimension;
903 int new_definition_p;
904 int nchars;
905
906 if (nargs != charset_arg_max)
907 return Fsignal (Qwrong_number_of_arguments,
908 Fcons (intern ("define-charset-internal"),
909 make_number (nargs)));
910
911 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
912
913 CHECK_SYMBOL (args[charset_arg_name]);
914 ASET (attrs, charset_name, args[charset_arg_name]);
915
916 val = args[charset_arg_code_space];
917 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
918 {
919 int min_byte, max_byte;
920
921 min_byte = XINT (Faref (val, make_number (i * 2)));
922 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
923 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
924 error ("Invalid :code-space value");
925 charset.code_space[i * 4] = min_byte;
926 charset.code_space[i * 4 + 1] = max_byte;
927 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
928 nchars *= charset.code_space[i * 4 + 2];
929 charset.code_space[i * 4 + 3] = nchars;
930 if (max_byte > 0)
931 dimension = i + 1;
932 }
933
934 val = args[charset_arg_dimension];
935 if (NILP (val))
936 charset.dimension = dimension;
937 else
938 {
939 CHECK_NATNUM (val);
940 charset.dimension = XINT (val);
941 if (charset.dimension < 1 || charset.dimension > 4)
942 args_out_of_range_3 (val, make_number (1), make_number (4));
943 }
944
945 charset.code_linear_p
946 = (charset.dimension == 1
947 || (charset.code_space[2] == 256
948 && (charset.dimension == 2
949 || (charset.code_space[6] == 256
950 && (charset.dimension == 3
951 || charset.code_space[10] == 256)))));
952
953 if (! charset.code_linear_p)
954 {
955 charset.code_space_mask = (unsigned char *) xmalloc (256);
956 bzero (charset.code_space_mask, 256);
957 for (i = 0; i < 4; i++)
958 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
959 j++)
960 charset.code_space_mask[j] |= (1 << i);
961 }
962
963 charset.iso_chars_96 = charset.code_space[2] == 96;
964
965 charset.min_code = (charset.code_space[0]
966 | (charset.code_space[4] << 8)
967 | (charset.code_space[8] << 16)
968 | (charset.code_space[12] << 24));
969 charset.max_code = (charset.code_space[1]
970 | (charset.code_space[5] << 8)
971 | (charset.code_space[9] << 16)
972 | (charset.code_space[13] << 24));
973 charset.char_index_offset = 0;
974
975 val = args[charset_arg_min_code];
976 if (! NILP (val))
977 {
978 unsigned code;
979
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
983 {
984 CHECK_CONS (val);
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
988 }
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
994 charset.min_code = code;
995 }
996
997 val = args[charset_arg_max_code];
998 if (! NILP (val))
999 {
1000 unsigned code;
1001
1002 if (INTEGERP (val))
1003 code = XINT (val);
1004 else
1005 {
1006 CHECK_CONS (val);
1007 CHECK_NUMBER_CAR (val);
1008 CHECK_NUMBER_CDR (val);
1009 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
1010 }
1011 if (code < charset.min_code
1012 || code > charset.max_code)
1013 args_out_of_range_3 (make_number (charset.min_code),
1014 make_number (charset.max_code), val);
1015 charset.max_code = code;
1016 }
1017
1018 charset.compact_codes_p = charset.max_code < 0x10000;
1019
1020 val = args[charset_arg_invalid_code];
1021 if (NILP (val))
1022 {
1023 if (charset.min_code > 0)
1024 charset.invalid_code = 0;
1025 else
1026 {
1027 XSETINT (val, charset.max_code + 1);
1028 if (XINT (val) == charset.max_code + 1)
1029 charset.invalid_code = charset.max_code + 1;
1030 else
1031 error ("Attribute :invalid-code must be specified");
1032 }
1033 }
1034 else
1035 {
1036 CHECK_NATNUM (val);
1037 charset.invalid_code = XFASTINT (val);
1038 }
1039
1040 val = args[charset_arg_iso_final];
1041 if (NILP (val))
1042 charset.iso_final = -1;
1043 else
1044 {
1045 CHECK_NUMBER (val);
1046 if (XINT (val) < '0' || XINT (val) > 127)
1047 error ("Invalid iso-final-char: %d", XINT (val));
1048 charset.iso_final = XINT (val);
1049 }
1050
1051 val = args[charset_arg_iso_revision];
1052 if (NILP (val))
1053 charset.iso_revision = -1;
1054 else
1055 {
1056 CHECK_NUMBER (val);
1057 if (XINT (val) > 63)
1058 args_out_of_range (make_number (63), val);
1059 charset.iso_revision = XINT (val);
1060 }
1061
1062 val = args[charset_arg_emacs_mule_id];
1063 if (NILP (val))
1064 charset.emacs_mule_id = -1;
1065 else
1066 {
1067 CHECK_NATNUM (val);
1068 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1069 error ("Invalid emacs-mule-id: %d", XINT (val));
1070 charset.emacs_mule_id = XINT (val);
1071 }
1072
1073 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1074
1075 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1076
1077 charset.unified_p = 0;
1078
1079 bzero (charset.fast_map, sizeof (charset.fast_map));
1080
1081 if (! NILP (args[charset_arg_code_offset]))
1082 {
1083 val = args[charset_arg_code_offset];
1084 CHECK_NUMBER (val);
1085
1086 charset.method = CHARSET_METHOD_OFFSET;
1087 charset.code_offset = XINT (val);
1088
1089 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1090 charset.min_char = i + charset.code_offset;
1091 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1092 charset.max_char = i + charset.code_offset;
1093 if (charset.max_char > MAX_CHAR)
1094 error ("Unsupported max char: %d", charset.max_char);
1095
1096 i = (charset.min_char >> 7) << 7;
1097 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1098 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1099 i = (i >> 12) << 12;
1100 for (; i <= charset.max_char; i += 0x1000)
1101 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1102 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1103 charset.ascii_compatible_p = 1;
1104 }
1105 else if (! NILP (args[charset_arg_map]))
1106 {
1107 val = args[charset_arg_map];
1108 ASET (attrs, charset_map, val);
1109 charset.method = CHARSET_METHOD_MAP;
1110 }
1111 else if (! NILP (args[charset_arg_subset]))
1112 {
1113 Lisp_Object parent;
1114 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1115 struct charset *parent_charset;
1116
1117 val = args[charset_arg_subset];
1118 parent = Fcar (val);
1119 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1120 parent_min_code = Fnth (make_number (1), val);
1121 CHECK_NATNUM (parent_min_code);
1122 parent_max_code = Fnth (make_number (2), val);
1123 CHECK_NATNUM (parent_max_code);
1124 parent_code_offset = Fnth (make_number (3), val);
1125 CHECK_NUMBER (parent_code_offset);
1126 val = Fmake_vector (make_number (4), Qnil);
1127 ASET (val, 0, make_number (parent_charset->id));
1128 ASET (val, 1, parent_min_code);
1129 ASET (val, 2, parent_max_code);
1130 ASET (val, 3, parent_code_offset);
1131 ASET (attrs, charset_subset, val);
1132
1133 charset.method = CHARSET_METHOD_SUBSET;
1134 /* Here, we just copy the parent's fast_map. It's not accurate,
1135 but at least it works for quickly detecting which character
1136 DOESN'T belong to this charset. */
1137 for (i = 0; i < 190; i++)
1138 charset.fast_map[i] = parent_charset->fast_map[i];
1139
1140 /* We also copy these for parents. */
1141 charset.min_char = parent_charset->min_char;
1142 charset.max_char = parent_charset->max_char;
1143 }
1144 else if (! NILP (args[charset_arg_superset]))
1145 {
1146 val = args[charset_arg_superset];
1147 charset.method = CHARSET_METHOD_SUPERSET;
1148 val = Fcopy_sequence (val);
1149 ASET (attrs, charset_superset, val);
1150
1151 charset.min_char = MAX_CHAR;
1152 charset.max_char = 0;
1153 for (; ! NILP (val); val = Fcdr (val))
1154 {
1155 Lisp_Object elt, car_part, cdr_part;
1156 int this_id, offset;
1157 struct charset *this_charset;
1158
1159 elt = Fcar (val);
1160 if (CONSP (elt))
1161 {
1162 car_part = XCAR (elt);
1163 cdr_part = XCDR (elt);
1164 CHECK_CHARSET_GET_ID (car_part, this_id);
1165 CHECK_NUMBER (cdr_part);
1166 offset = XINT (cdr_part);
1167 }
1168 else
1169 {
1170 CHECK_CHARSET_GET_ID (elt, this_id);
1171 offset = 0;
1172 }
1173 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1174
1175 this_charset = CHARSET_FROM_ID (this_id);
1176 if (charset.min_char > this_charset->min_char)
1177 charset.min_char = this_charset->min_char;
1178 if (charset.max_char < this_charset->max_char)
1179 charset.max_char = this_charset->max_char;
1180 for (i = 0; i < 190; i++)
1181 charset.fast_map[i] |= this_charset->fast_map[i];
1182 }
1183 }
1184 else
1185 error ("None of :code-offset, :map, :parents are specified");
1186
1187 val = args[charset_arg_unify_map];
1188 if (! NILP (val) && !STRINGP (val))
1189 CHECK_VECTOR (val);
1190 ASET (attrs, charset_unify_map, val);
1191
1192 CHECK_LIST (args[charset_arg_plist]);
1193 ASET (attrs, charset_plist, args[charset_arg_plist]);
1194
1195 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1196 &hash_code);
1197 if (charset.hash_index >= 0)
1198 {
1199 new_definition_p = 0;
1200 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1201 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1202 }
1203 else
1204 {
1205 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1206 hash_code);
1207 if (charset_table_used == charset_table_size)
1208 {
1209 struct charset *new_table
1210 = (struct charset *) xmalloc (sizeof (struct charset)
1211 * (charset_table_size + 16));
1212 bcopy (charset_table, new_table,
1213 sizeof (struct charset) * charset_table_size);
1214 charset_table_size += 16;
1215 charset_table = new_table;
1216 }
1217 id = charset_table_used++;
1218 new_definition_p = 1;
1219 }
1220
1221 ASET (attrs, charset_id, make_number (id));
1222 charset.id = id;
1223 charset_table[id] = charset;
1224
1225 if (charset.method == CHARSET_METHOD_MAP)
1226 {
1227 load_charset (&charset, 0);
1228 charset_table[id] = charset;
1229 }
1230
1231 if (charset.iso_final >= 0)
1232 {
1233 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1234 charset.iso_final) = id;
1235 if (new_definition_p)
1236 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1237 Fcons (make_number (id), Qnil));
1238 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1239 charset_jisx0201_roman = id;
1240 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1241 charset_jisx0208_1978 = id;
1242 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1243 charset_jisx0208 = id;
1244 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1245 charset_ksc5601 = id;
1246 }
1247
1248 if (charset.emacs_mule_id >= 0)
1249 {
1250 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1251 if (charset.emacs_mule_id < 0xA0)
1252 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1253 else
1254 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1255 if (new_definition_p)
1256 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1257 Fcons (make_number (id), Qnil));
1258 }
1259
1260 if (new_definition_p)
1261 {
1262 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1263 if (charset.supplementary_p)
1264 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1265 Fcons (make_number (id), Qnil));
1266 else
1267 {
1268 Lisp_Object tail;
1269
1270 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1271 {
1272 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1273
1274 if (cs->supplementary_p)
1275 break;
1276 }
1277 if (EQ (tail, Vcharset_ordered_list))
1278 Vcharset_ordered_list = Fcons (make_number (id),
1279 Vcharset_ordered_list);
1280 else if (NILP (tail))
1281 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1282 Fcons (make_number (id), Qnil));
1283 else
1284 {
1285 val = Fcons (XCAR (tail), XCDR (tail));
1286 XSETCDR (tail, val);
1287 XSETCAR (tail, make_number (id));
1288 }
1289 }
1290 charset_ordered_list_tick++;
1291 }
1292
1293 return Qnil;
1294 }
1295
1296
1297 /* Same as Fdefine_charset_internal but arguments are more convenient
1298 to call from C (typically in syms_of_charset). This can define a
1299 charset of `offset' method only. Return the ID of the new
1300 charset. */
1301
1302 static int
1303 define_charset_internal (name, dimension, code_space, min_code, max_code,
1304 iso_final, iso_revision, emacs_mule_id,
1305 ascii_compatible, supplementary,
1306 code_offset)
1307 Lisp_Object name;
1308 int dimension;
1309 unsigned char *code_space;
1310 unsigned min_code, max_code;
1311 int iso_final, iso_revision, emacs_mule_id;
1312 int ascii_compatible, supplementary;
1313 int code_offset;
1314 {
1315 Lisp_Object args[charset_arg_max];
1316 Lisp_Object plist[14];
1317 Lisp_Object val;
1318 int i;
1319
1320 args[charset_arg_name] = name;
1321 args[charset_arg_dimension] = make_number (dimension);
1322 val = Fmake_vector (make_number (8), make_number (0));
1323 for (i = 0; i < 8; i++)
1324 ASET (val, i, make_number (code_space[i]));
1325 args[charset_arg_code_space] = val;
1326 args[charset_arg_min_code] = make_number (min_code);
1327 args[charset_arg_max_code] = make_number (max_code);
1328 args[charset_arg_iso_final]
1329 = (iso_final < 0 ? Qnil : make_number (iso_final));
1330 args[charset_arg_iso_revision] = make_number (iso_revision);
1331 args[charset_arg_emacs_mule_id]
1332 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1333 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1334 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1335 args[charset_arg_invalid_code] = Qnil;
1336 args[charset_arg_code_offset] = make_number (code_offset);
1337 args[charset_arg_map] = Qnil;
1338 args[charset_arg_subset] = Qnil;
1339 args[charset_arg_superset] = Qnil;
1340 args[charset_arg_unify_map] = Qnil;
1341
1342 plist[0] = intern_c_string (":name");
1343 plist[1] = args[charset_arg_name];
1344 plist[2] = intern_c_string (":dimension");
1345 plist[3] = args[charset_arg_dimension];
1346 plist[4] = intern_c_string (":code-space");
1347 plist[5] = args[charset_arg_code_space];
1348 plist[6] = intern_c_string (":iso-final-char");
1349 plist[7] = args[charset_arg_iso_final];
1350 plist[8] = intern_c_string (":emacs-mule-id");
1351 plist[9] = args[charset_arg_emacs_mule_id];
1352 plist[10] = intern_c_string (":ascii-compatible-p");
1353 plist[11] = args[charset_arg_ascii_compatible_p];
1354 plist[12] = intern_c_string (":code-offset");
1355 plist[13] = args[charset_arg_code_offset];
1356
1357 args[charset_arg_plist] = Flist (14, plist);
1358 Fdefine_charset_internal (charset_arg_max, args);
1359
1360 return XINT (CHARSET_SYMBOL_ID (name));
1361 }
1362
1363
1364 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1365 Sdefine_charset_alias, 2, 2, 0,
1366 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1367 (alias, charset)
1368 Lisp_Object alias, charset;
1369 {
1370 Lisp_Object attr;
1371
1372 CHECK_CHARSET_GET_ATTR (charset, attr);
1373 Fputhash (alias, attr, Vcharset_hash_table);
1374 Vcharset_list = Fcons (alias, Vcharset_list);
1375 return Qnil;
1376 }
1377
1378
1379 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1380 doc: /* Return the property list of CHARSET. */)
1381 (charset)
1382 Lisp_Object charset;
1383 {
1384 Lisp_Object attrs;
1385
1386 CHECK_CHARSET_GET_ATTR (charset, attrs);
1387 return CHARSET_ATTR_PLIST (attrs);
1388 }
1389
1390
1391 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1392 doc: /* Set CHARSET's property list to PLIST. */)
1393 (charset, plist)
1394 Lisp_Object charset, plist;
1395 {
1396 Lisp_Object attrs;
1397
1398 CHECK_CHARSET_GET_ATTR (charset, attrs);
1399 CHARSET_ATTR_PLIST (attrs) = plist;
1400 return plist;
1401 }
1402
1403
1404 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1405 doc: /* Unify characters of CHARSET with Unicode.
1406 This means reading the relevant file and installing the table defined
1407 by CHARSET's `:unify-map' property.
1408
1409 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1410 the same meaning as the `:unify-map' attribute in the function
1411 `define-charset' (which see).
1412
1413 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1414 (charset, unify_map, deunify)
1415 Lisp_Object charset, unify_map, deunify;
1416 {
1417 int id;
1418 struct charset *cs;
1419
1420 CHECK_CHARSET_GET_ID (charset, id);
1421 cs = CHARSET_FROM_ID (id);
1422 if (NILP (deunify)
1423 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1424 : ! CHARSET_UNIFIED_P (cs))
1425 return Qnil;
1426
1427 CHARSET_UNIFIED_P (cs) = 0;
1428 if (NILP (deunify))
1429 {
1430 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1431 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1432 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1433 if (NILP (unify_map))
1434 unify_map = CHARSET_UNIFY_MAP (cs);
1435 else
1436 {
1437 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1438 signal_error ("Bad unify-map", unify_map);
1439 CHARSET_UNIFY_MAP (cs) = unify_map;
1440 }
1441 if (NILP (Vchar_unify_table))
1442 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1443 char_table_set_range (Vchar_unify_table,
1444 cs->min_char, cs->max_char, charset);
1445 CHARSET_UNIFIED_P (cs) = 1;
1446 }
1447 else if (CHAR_TABLE_P (Vchar_unify_table))
1448 {
1449 int min_code = CHARSET_MIN_CODE (cs);
1450 int max_code = CHARSET_MAX_CODE (cs);
1451 int min_char = DECODE_CHAR (cs, min_code);
1452 int max_char = DECODE_CHAR (cs, max_code);
1453
1454 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1455 }
1456
1457 return Qnil;
1458 }
1459
1460 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1461 Sget_unused_iso_final_char, 2, 2, 0,
1462 doc: /*
1463 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1464 DIMENSION is the number of bytes to represent a character: 1 or 2.
1465 CHARS is the number of characters in a dimension: 94 or 96.
1466
1467 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1468 If there's no unused final char for the specified kind of charset,
1469 return nil. */)
1470 (dimension, chars)
1471 Lisp_Object dimension, chars;
1472 {
1473 int final_char;
1474
1475 CHECK_NUMBER (dimension);
1476 CHECK_NUMBER (chars);
1477 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1478 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1479 if (XINT (chars) != 94 && XINT (chars) != 96)
1480 args_out_of_range_3 (chars, make_number (94), make_number (96));
1481 for (final_char = '0'; final_char <= '?'; final_char++)
1482 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1483 break;
1484 return (final_char <= '?' ? make_number (final_char) : Qnil);
1485 }
1486
1487 static void
1488 check_iso_charset_parameter (dimension, chars, final_char)
1489 Lisp_Object dimension, chars, final_char;
1490 {
1491 CHECK_NATNUM (dimension);
1492 CHECK_NATNUM (chars);
1493 CHECK_NATNUM (final_char);
1494
1495 if (XINT (dimension) > 3)
1496 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1497 if (XINT (chars) != 94 && XINT (chars) != 96)
1498 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1499 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1500 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1501 }
1502
1503
1504 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1505 4, 4, 0,
1506 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1507
1508 On decoding by an ISO-2022 base coding system, when a charset
1509 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1510 if CHARSET is designated instead. */)
1511 (dimension, chars, final_char, charset)
1512 Lisp_Object dimension, chars, final_char, charset;
1513 {
1514 int id;
1515 int chars_flag;
1516
1517 CHECK_CHARSET_GET_ID (charset, id);
1518 check_iso_charset_parameter (dimension, chars, final_char);
1519 chars_flag = XINT (chars) == 96;
1520 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1521 return Qnil;
1522 }
1523
1524
1525 /* Return information about charsets in the text at PTR of NBYTES
1526 bytes, which are NCHARS characters. The value is:
1527
1528 0: Each character is represented by one byte. This is always
1529 true for a unibyte string. For a multibyte string, true if
1530 it contains only ASCII characters.
1531
1532 1: No charsets other than ascii, control-1, and latin-1 are
1533 found.
1534
1535 2: Otherwise.
1536 */
1537
1538 int
1539 string_xstring_p (string)
1540 Lisp_Object string;
1541 {
1542 const unsigned char *p = SDATA (string);
1543 const unsigned char *endp = p + SBYTES (string);
1544
1545 if (SCHARS (string) == SBYTES (string))
1546 return 0;
1547
1548 while (p < endp)
1549 {
1550 int c = STRING_CHAR_ADVANCE (p);
1551
1552 if (c >= 0x100)
1553 return 2;
1554 }
1555 return 1;
1556 }
1557
1558
1559 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1560
1561 CHARSETS is a vector. If Nth element is non-nil, it means the
1562 charset whose id is N is already found.
1563
1564 It may lookup a translation table TABLE if supplied. */
1565
1566 static void
1567 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1568 const unsigned char *ptr;
1569 EMACS_INT nchars, nbytes;
1570 Lisp_Object charsets, table;
1571 int multibyte;
1572 {
1573 const unsigned char *pend = ptr + nbytes;
1574
1575 if (nchars == nbytes)
1576 {
1577 if (multibyte)
1578 ASET (charsets, charset_ascii, Qt);
1579 else
1580 while (ptr < pend)
1581 {
1582 int c = *ptr++;
1583
1584 if (!NILP (table))
1585 c = translate_char (table, c);
1586 if (ASCII_BYTE_P (c))
1587 ASET (charsets, charset_ascii, Qt);
1588 else
1589 ASET (charsets, charset_eight_bit, Qt);
1590 }
1591 }
1592 else
1593 {
1594 while (ptr < pend)
1595 {
1596 int c = STRING_CHAR_ADVANCE (ptr);
1597 struct charset *charset;
1598
1599 if (!NILP (table))
1600 c = translate_char (table, c);
1601 charset = CHAR_CHARSET (c);
1602 ASET (charsets, CHARSET_ID (charset), Qt);
1603 }
1604 }
1605 }
1606
1607 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1608 2, 3, 0,
1609 doc: /* Return a list of charsets in the region between BEG and END.
1610 BEG and END are buffer positions.
1611 Optional arg TABLE if non-nil is a translation table to look up.
1612
1613 If the current buffer is unibyte, the returned list may contain
1614 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1615 (beg, end, table)
1616 Lisp_Object beg, end, table;
1617 {
1618 Lisp_Object charsets;
1619 EMACS_INT from, from_byte, to, stop, stop_byte;
1620 int i;
1621 Lisp_Object val;
1622 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1623
1624 validate_region (&beg, &end);
1625 from = XFASTINT (beg);
1626 stop = to = XFASTINT (end);
1627
1628 if (from < GPT && GPT < to)
1629 {
1630 stop = GPT;
1631 stop_byte = GPT_BYTE;
1632 }
1633 else
1634 stop_byte = CHAR_TO_BYTE (stop);
1635
1636 from_byte = CHAR_TO_BYTE (from);
1637
1638 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1639 while (1)
1640 {
1641 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1642 stop_byte - from_byte, charsets, table,
1643 multibyte);
1644 if (stop < to)
1645 {
1646 from = stop, from_byte = stop_byte;
1647 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1648 }
1649 else
1650 break;
1651 }
1652
1653 val = Qnil;
1654 for (i = charset_table_used - 1; i >= 0; i--)
1655 if (!NILP (AREF (charsets, i)))
1656 val = Fcons (CHARSET_NAME (charset_table + i), val);
1657 return val;
1658 }
1659
1660 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1661 1, 2, 0,
1662 doc: /* Return a list of charsets in STR.
1663 Optional arg TABLE if non-nil is a translation table to look up.
1664
1665 If STR is unibyte, the returned list may contain
1666 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1667 (str, table)
1668 Lisp_Object str, table;
1669 {
1670 Lisp_Object charsets;
1671 int i;
1672 Lisp_Object val;
1673
1674 CHECK_STRING (str);
1675
1676 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1677 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1678 charsets, table,
1679 STRING_MULTIBYTE (str));
1680 val = Qnil;
1681 for (i = charset_table_used - 1; i >= 0; i--)
1682 if (!NILP (AREF (charsets, i)))
1683 val = Fcons (CHARSET_NAME (charset_table + i), val);
1684 return val;
1685 }
1686
1687 \f
1688
1689 /* Return a unified character code for C (>= 0x110000). VAL is a
1690 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1691 charset symbol. */
1692 int
1693 maybe_unify_char (c, val)
1694 int c;
1695 Lisp_Object val;
1696 {
1697 struct charset *charset;
1698
1699 if (INTEGERP (val))
1700 return XINT (val);
1701 if (NILP (val))
1702 return c;
1703
1704 CHECK_CHARSET_GET_CHARSET (val, charset);
1705 load_charset (charset, 1);
1706 if (! inhibit_load_charset_map)
1707 {
1708 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1709 if (! NILP (val))
1710 c = XINT (val);
1711 }
1712 else
1713 {
1714 int code_index = c - CHARSET_CODE_OFFSET (charset);
1715 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1716
1717 if (unified > 0)
1718 c = unified;
1719 }
1720 return c;
1721 }
1722
1723
1724 /* Return a character correponding to the code-point CODE of
1725 CHARSET. */
1726
1727 int
1728 decode_char (charset, code)
1729 struct charset *charset;
1730 unsigned code;
1731 {
1732 int c, char_index;
1733 enum charset_method method = CHARSET_METHOD (charset);
1734
1735 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1736 return -1;
1737
1738 if (method == CHARSET_METHOD_SUBSET)
1739 {
1740 Lisp_Object subset_info;
1741
1742 subset_info = CHARSET_SUBSET (charset);
1743 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1744 code -= XINT (AREF (subset_info, 3));
1745 if (code < XFASTINT (AREF (subset_info, 1))
1746 || code > XFASTINT (AREF (subset_info, 2)))
1747 c = -1;
1748 else
1749 c = DECODE_CHAR (charset, code);
1750 }
1751 else if (method == CHARSET_METHOD_SUPERSET)
1752 {
1753 Lisp_Object parents;
1754
1755 parents = CHARSET_SUPERSET (charset);
1756 c = -1;
1757 for (; CONSP (parents); parents = XCDR (parents))
1758 {
1759 int id = XINT (XCAR (XCAR (parents)));
1760 int code_offset = XINT (XCDR (XCAR (parents)));
1761 unsigned this_code = code - code_offset;
1762
1763 charset = CHARSET_FROM_ID (id);
1764 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1765 break;
1766 }
1767 }
1768 else
1769 {
1770 char_index = CODE_POINT_TO_INDEX (charset, code);
1771 if (char_index < 0)
1772 return -1;
1773
1774 if (method == CHARSET_METHOD_MAP)
1775 {
1776 Lisp_Object decoder;
1777
1778 decoder = CHARSET_DECODER (charset);
1779 if (! VECTORP (decoder))
1780 {
1781 load_charset (charset, 1);
1782 decoder = CHARSET_DECODER (charset);
1783 }
1784 if (VECTORP (decoder))
1785 c = XINT (AREF (decoder, char_index));
1786 else
1787 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1788 }
1789 else /* method == CHARSET_METHOD_OFFSET */
1790 {
1791 c = char_index + CHARSET_CODE_OFFSET (charset);
1792 if (CHARSET_UNIFIED_P (charset)
1793 && c > MAX_UNICODE_CHAR)
1794 MAYBE_UNIFY_CHAR (c);
1795 }
1796 }
1797
1798 return c;
1799 }
1800
1801 /* Variable used temporarily by the macro ENCODE_CHAR. */
1802 Lisp_Object charset_work;
1803
1804 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1805 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1806 use CHARSET's strict_max_char instead of max_char. */
1807
1808 unsigned
1809 encode_char (charset, c)
1810 struct charset *charset;
1811 int c;
1812 {
1813 unsigned code;
1814 enum charset_method method = CHARSET_METHOD (charset);
1815
1816 if (CHARSET_UNIFIED_P (charset))
1817 {
1818 Lisp_Object deunifier;
1819 int code_index = -1;
1820
1821 deunifier = CHARSET_DEUNIFIER (charset);
1822 if (! CHAR_TABLE_P (deunifier))
1823 {
1824 load_charset (charset, 2);
1825 deunifier = CHARSET_DEUNIFIER (charset);
1826 }
1827 if (CHAR_TABLE_P (deunifier))
1828 {
1829 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1830
1831 if (INTEGERP (deunified))
1832 code_index = XINT (deunified);
1833 }
1834 else
1835 {
1836 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1837 }
1838 if (code_index >= 0)
1839 c = CHARSET_CODE_OFFSET (charset) + code_index;
1840 }
1841
1842 if (method == CHARSET_METHOD_SUBSET)
1843 {
1844 Lisp_Object subset_info;
1845 struct charset *this_charset;
1846
1847 subset_info = CHARSET_SUBSET (charset);
1848 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1849 code = ENCODE_CHAR (this_charset, c);
1850 if (code == CHARSET_INVALID_CODE (this_charset)
1851 || code < XFASTINT (AREF (subset_info, 1))
1852 || code > XFASTINT (AREF (subset_info, 2)))
1853 return CHARSET_INVALID_CODE (charset);
1854 code += XINT (AREF (subset_info, 3));
1855 return code;
1856 }
1857
1858 if (method == CHARSET_METHOD_SUPERSET)
1859 {
1860 Lisp_Object parents;
1861
1862 parents = CHARSET_SUPERSET (charset);
1863 for (; CONSP (parents); parents = XCDR (parents))
1864 {
1865 int id = XINT (XCAR (XCAR (parents)));
1866 int code_offset = XINT (XCDR (XCAR (parents)));
1867 struct charset *this_charset = CHARSET_FROM_ID (id);
1868
1869 code = ENCODE_CHAR (this_charset, c);
1870 if (code != CHARSET_INVALID_CODE (this_charset))
1871 return code + code_offset;
1872 }
1873 return CHARSET_INVALID_CODE (charset);
1874 }
1875
1876 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1877 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1878 return CHARSET_INVALID_CODE (charset);
1879
1880 if (method == CHARSET_METHOD_MAP)
1881 {
1882 Lisp_Object encoder;
1883 Lisp_Object val;
1884
1885 encoder = CHARSET_ENCODER (charset);
1886 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1887 {
1888 load_charset (charset, 2);
1889 encoder = CHARSET_ENCODER (charset);
1890 }
1891 if (CHAR_TABLE_P (encoder))
1892 {
1893 val = CHAR_TABLE_REF (encoder, c);
1894 if (NILP (val))
1895 return CHARSET_INVALID_CODE (charset);
1896 code = XINT (val);
1897 if (! CHARSET_COMPACT_CODES_P (charset))
1898 code = INDEX_TO_CODE_POINT (charset, code);
1899 }
1900 else
1901 {
1902 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1903 code = INDEX_TO_CODE_POINT (charset, code);
1904 }
1905 }
1906 else /* method == CHARSET_METHOD_OFFSET */
1907 {
1908 int code_index = c - CHARSET_CODE_OFFSET (charset);
1909
1910 code = INDEX_TO_CODE_POINT (charset, code_index);
1911 }
1912
1913 return code;
1914 }
1915
1916
1917 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1918 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1919 Return nil if CODE-POINT is not valid in CHARSET.
1920
1921 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1922
1923 Optional argument RESTRICTION specifies a way to map the pair of CCS
1924 and CODE-POINT to a character. Currently not supported and just ignored. */)
1925 (charset, code_point, restriction)
1926 Lisp_Object charset, code_point, restriction;
1927 {
1928 int c, id;
1929 unsigned code;
1930 struct charset *charsetp;
1931
1932 CHECK_CHARSET_GET_ID (charset, id);
1933 if (CONSP (code_point))
1934 {
1935 CHECK_NATNUM_CAR (code_point);
1936 CHECK_NATNUM_CDR (code_point);
1937 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1938 }
1939 else
1940 {
1941 CHECK_NATNUM (code_point);
1942 code = XINT (code_point);
1943 }
1944 charsetp = CHARSET_FROM_ID (id);
1945 c = DECODE_CHAR (charsetp, code);
1946 return (c >= 0 ? make_number (c) : Qnil);
1947 }
1948
1949
1950 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1951 doc: /* Encode the character CH into a code-point of CHARSET.
1952 Return nil if CHARSET doesn't include CH.
1953
1954 Optional argument RESTRICTION specifies a way to map CH to a
1955 code-point in CCS. Currently not supported and just ignored. */)
1956 (ch, charset, restriction)
1957 Lisp_Object ch, charset, restriction;
1958 {
1959 int id;
1960 unsigned code;
1961 struct charset *charsetp;
1962
1963 CHECK_CHARSET_GET_ID (charset, id);
1964 CHECK_NATNUM (ch);
1965 charsetp = CHARSET_FROM_ID (id);
1966 code = ENCODE_CHAR (charsetp, XINT (ch));
1967 if (code == CHARSET_INVALID_CODE (charsetp))
1968 return Qnil;
1969 if (code > 0x7FFFFFF)
1970 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1971 return make_number (code);
1972 }
1973
1974
1975 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1976 doc:
1977 /* Return a character of CHARSET whose position codes are CODEn.
1978
1979 CODE1 through CODE4 are optional, but if you don't supply sufficient
1980 position codes, it is assumed that the minimum code in each dimension
1981 is specified. */)
1982 (charset, code1, code2, code3, code4)
1983 Lisp_Object charset, code1, code2, code3, code4;
1984 {
1985 int id, dimension;
1986 struct charset *charsetp;
1987 unsigned code;
1988 int c;
1989
1990 CHECK_CHARSET_GET_ID (charset, id);
1991 charsetp = CHARSET_FROM_ID (id);
1992
1993 dimension = CHARSET_DIMENSION (charsetp);
1994 if (NILP (code1))
1995 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1996 ? 0 : CHARSET_MIN_CODE (charsetp));
1997 else
1998 {
1999 CHECK_NATNUM (code1);
2000 if (XFASTINT (code1) >= 0x100)
2001 args_out_of_range (make_number (0xFF), code1);
2002 code = XFASTINT (code1);
2003
2004 if (dimension > 1)
2005 {
2006 code <<= 8;
2007 if (NILP (code2))
2008 code |= charsetp->code_space[(dimension - 2) * 4];
2009 else
2010 {
2011 CHECK_NATNUM (code2);
2012 if (XFASTINT (code2) >= 0x100)
2013 args_out_of_range (make_number (0xFF), code2);
2014 code |= XFASTINT (code2);
2015 }
2016
2017 if (dimension > 2)
2018 {
2019 code <<= 8;
2020 if (NILP (code3))
2021 code |= charsetp->code_space[(dimension - 3) * 4];
2022 else
2023 {
2024 CHECK_NATNUM (code3);
2025 if (XFASTINT (code3) >= 0x100)
2026 args_out_of_range (make_number (0xFF), code3);
2027 code |= XFASTINT (code3);
2028 }
2029
2030 if (dimension > 3)
2031 {
2032 code <<= 8;
2033 if (NILP (code4))
2034 code |= charsetp->code_space[0];
2035 else
2036 {
2037 CHECK_NATNUM (code4);
2038 if (XFASTINT (code4) >= 0x100)
2039 args_out_of_range (make_number (0xFF), code4);
2040 code |= XFASTINT (code4);
2041 }
2042 }
2043 }
2044 }
2045 }
2046
2047 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2048 code &= 0x7F7F7F7F;
2049 c = DECODE_CHAR (charsetp, code);
2050 if (c < 0)
2051 error ("Invalid code(s)");
2052 return make_number (c);
2053 }
2054
2055
2056 /* Return the first charset in CHARSET_LIST that contains C.
2057 CHARSET_LIST is a list of charset IDs. If it is nil, use
2058 Vcharset_ordered_list. */
2059
2060 struct charset *
2061 char_charset (c, charset_list, code_return)
2062 int c;
2063 Lisp_Object charset_list;
2064 unsigned *code_return;
2065 {
2066 int maybe_null = 0;
2067
2068 if (NILP (charset_list))
2069 charset_list = Vcharset_ordered_list;
2070 else
2071 maybe_null = 1;
2072
2073 while (CONSP (charset_list))
2074 {
2075 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2076 unsigned code = ENCODE_CHAR (charset, c);
2077
2078 if (code != CHARSET_INVALID_CODE (charset))
2079 {
2080 if (code_return)
2081 *code_return = code;
2082 return charset;
2083 }
2084 charset_list = XCDR (charset_list);
2085 if (c <= MAX_UNICODE_CHAR
2086 && EQ (charset_list, Vcharset_non_preferred_head))
2087 return CHARSET_FROM_ID (charset_unicode);
2088 }
2089 return (maybe_null ? NULL
2090 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2091 : CHARSET_FROM_ID (charset_eight_bit));
2092 }
2093
2094
2095 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2096 doc:
2097 /*Return list of charset and one to four position-codes of CH.
2098 The charset is decided by the current priority order of charsets.
2099 A position-code is a byte value of each dimension of the code-point of
2100 CH in the charset. */)
2101 (ch)
2102 Lisp_Object ch;
2103 {
2104 struct charset *charset;
2105 int c, dimension;
2106 unsigned code;
2107 Lisp_Object val;
2108
2109 CHECK_CHARACTER (ch);
2110 c = XFASTINT (ch);
2111 charset = CHAR_CHARSET (c);
2112 if (! charset)
2113 abort ();
2114 code = ENCODE_CHAR (charset, c);
2115 if (code == CHARSET_INVALID_CODE (charset))
2116 abort ();
2117 dimension = CHARSET_DIMENSION (charset);
2118 for (val = Qnil; dimension > 0; dimension--)
2119 {
2120 val = Fcons (make_number (code & 0xFF), val);
2121 code >>= 8;
2122 }
2123 return Fcons (CHARSET_NAME (charset), val);
2124 }
2125
2126
2127 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2128 doc: /* Return the charset of highest priority that contains CH.
2129 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2130 from which to find the charset. It may also be a coding system. In
2131 that case, find the charset from what supported by that coding system. */)
2132 (ch, restriction)
2133 Lisp_Object ch, restriction;
2134 {
2135 struct charset *charset;
2136
2137 CHECK_CHARACTER (ch);
2138 if (NILP (restriction))
2139 charset = CHAR_CHARSET (XINT (ch));
2140 else
2141 {
2142 Lisp_Object charset_list;
2143
2144 if (CONSP (restriction))
2145 {
2146 for (charset_list = Qnil; CONSP (restriction);
2147 restriction = XCDR (restriction))
2148 {
2149 int id;
2150
2151 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2152 charset_list = Fcons (make_number (id), charset_list);
2153 }
2154 charset_list = Fnreverse (charset_list);
2155 }
2156 else
2157 charset_list = coding_system_charset_list (restriction);
2158 charset = char_charset (XINT (ch), charset_list, NULL);
2159 if (! charset)
2160 return Qnil;
2161 }
2162 return (CHARSET_NAME (charset));
2163 }
2164
2165
2166 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2167 doc: /*
2168 Return charset of a character in the current buffer at position POS.
2169 If POS is nil, it defauls to the current point.
2170 If POS is out of range, the value is nil. */)
2171 (pos)
2172 Lisp_Object pos;
2173 {
2174 Lisp_Object ch;
2175 struct charset *charset;
2176
2177 ch = Fchar_after (pos);
2178 if (! INTEGERP (ch))
2179 return ch;
2180 charset = CHAR_CHARSET (XINT (ch));
2181 return (CHARSET_NAME (charset));
2182 }
2183
2184
2185 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2186 doc: /*
2187 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2188
2189 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2190 by their DIMENSION, CHARS, and FINAL-CHAR,
2191 whereas Emacs distinguishes them by charset symbol.
2192 See the documentation of the function `charset-info' for the meanings of
2193 DIMENSION, CHARS, and FINAL-CHAR. */)
2194 (dimension, chars, final_char)
2195 Lisp_Object dimension, chars, final_char;
2196 {
2197 int id;
2198 int chars_flag;
2199
2200 check_iso_charset_parameter (dimension, chars, final_char);
2201 chars_flag = XFASTINT (chars) == 96;
2202 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2203 XFASTINT (final_char));
2204 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2205 }
2206
2207
2208 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2209 0, 0, 0,
2210 doc: /*
2211 Internal use only.
2212 Clear temporary charset mapping tables.
2213 It should be called only from temacs invoked for dumping. */)
2214 ()
2215 {
2216 if (temp_charset_work)
2217 {
2218 free (temp_charset_work);
2219 temp_charset_work = NULL;
2220 }
2221
2222 if (CHAR_TABLE_P (Vchar_unify_table))
2223 Foptimize_char_table (Vchar_unify_table, Qnil);
2224
2225 return Qnil;
2226 }
2227
2228 DEFUN ("charset-priority-list", Fcharset_priority_list,
2229 Scharset_priority_list, 0, 1, 0,
2230 doc: /* Return the list of charsets ordered by priority.
2231 HIGHESTP non-nil means just return the highest priority one. */)
2232 (highestp)
2233 Lisp_Object highestp;
2234 {
2235 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2236
2237 if (!NILP (highestp))
2238 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2239
2240 while (!NILP (list))
2241 {
2242 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2243 list = XCDR (list);
2244 }
2245 return Fnreverse (val);
2246 }
2247
2248 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2249 1, MANY, 0,
2250 doc: /* Assign higher priority to the charsets given as arguments.
2251 usage: (set-charset-priority &rest charsets) */)
2252 (nargs, args)
2253 int nargs;
2254 Lisp_Object *args;
2255 {
2256 Lisp_Object new_head, old_list, arglist[2];
2257 Lisp_Object list_2022, list_emacs_mule;
2258 int i, id;
2259
2260 old_list = Fcopy_sequence (Vcharset_ordered_list);
2261 new_head = Qnil;
2262 for (i = 0; i < nargs; i++)
2263 {
2264 CHECK_CHARSET_GET_ID (args[i], id);
2265 if (! NILP (Fmemq (make_number (id), old_list)))
2266 {
2267 old_list = Fdelq (make_number (id), old_list);
2268 new_head = Fcons (make_number (id), new_head);
2269 }
2270 }
2271 arglist[0] = Fnreverse (new_head);
2272 arglist[1] = Vcharset_non_preferred_head = old_list;
2273 Vcharset_ordered_list = Fnconc (2, arglist);
2274 charset_ordered_list_tick++;
2275
2276 charset_unibyte = -1;
2277 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2278 CONSP (old_list); old_list = XCDR (old_list))
2279 {
2280 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2281 list_2022 = Fcons (XCAR (old_list), list_2022);
2282 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2283 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2284 if (charset_unibyte < 0)
2285 {
2286 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2287
2288 if (CHARSET_DIMENSION (charset) == 1
2289 && CHARSET_ASCII_COMPATIBLE_P (charset)
2290 && CHARSET_MAX_CHAR (charset) >= 0x80)
2291 charset_unibyte = CHARSET_ID (charset);
2292 }
2293 }
2294 Viso_2022_charset_list = Fnreverse (list_2022);
2295 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2296 if (charset_unibyte < 0)
2297 charset_unibyte = charset_iso_8859_1;
2298
2299 return Qnil;
2300 }
2301
2302 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2303 0, 1, 0,
2304 doc: /* Internal use only.
2305 Return charset identification number of CHARSET. */)
2306 (charset)
2307 Lisp_Object charset;
2308 {
2309 int id;
2310
2311 CHECK_CHARSET_GET_ID (charset, id);
2312 return make_number (id);
2313 }
2314
2315 \f
2316 void
2317 init_charset ()
2318 {
2319 Lisp_Object tempdir;
2320 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2321 if (access ((char *) SDATA (tempdir), 0) < 0)
2322 {
2323 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2324 Emacs will not function correctly without the character map files.\n\
2325 Please check your installation!\n",
2326 tempdir);
2327 /* TODO should this be a fatal error? (Bug#909) */
2328 }
2329
2330 Vcharset_map_path = Fcons (tempdir, Qnil);
2331 }
2332
2333
2334 void
2335 init_charset_once ()
2336 {
2337 int i, j, k;
2338
2339 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2340 for (j = 0; j < ISO_MAX_CHARS; j++)
2341 for (k = 0; k < ISO_MAX_FINAL; k++)
2342 iso_charset_table[i][j][k] = -1;
2343
2344 for (i = 0; i < 256; i++)
2345 emacs_mule_charset[i] = NULL;
2346
2347 charset_jisx0201_roman = -1;
2348 charset_jisx0208_1978 = -1;
2349 charset_jisx0208 = -1;
2350 charset_ksc5601 = -1;
2351 }
2352
2353 #ifdef emacs
2354
2355 void
2356 syms_of_charset ()
2357 {
2358 DEFSYM (Qcharsetp, "charsetp");
2359
2360 DEFSYM (Qascii, "ascii");
2361 DEFSYM (Qunicode, "unicode");
2362 DEFSYM (Qemacs, "emacs");
2363 DEFSYM (Qeight_bit, "eight-bit");
2364 DEFSYM (Qiso_8859_1, "iso-8859-1");
2365
2366 DEFSYM (Qgl, "gl");
2367 DEFSYM (Qgr, "gr");
2368
2369 staticpro (&Vcharset_ordered_list);
2370 Vcharset_ordered_list = Qnil;
2371
2372 staticpro (&Viso_2022_charset_list);
2373 Viso_2022_charset_list = Qnil;
2374
2375 staticpro (&Vemacs_mule_charset_list);
2376 Vemacs_mule_charset_list = Qnil;
2377
2378 /* Don't staticpro them here. It's done in syms_of_fns. */
2379 QCtest = intern (":test");
2380 Qeq = intern ("eq");
2381
2382 staticpro (&Vcharset_hash_table);
2383 {
2384 Lisp_Object args[2];
2385 args[0] = QCtest;
2386 args[1] = Qeq;
2387 Vcharset_hash_table = Fmake_hash_table (2, args);
2388 }
2389
2390 charset_table_size = 128;
2391 charset_table = ((struct charset *)
2392 xmalloc (sizeof (struct charset) * charset_table_size));
2393 charset_table_used = 0;
2394
2395 defsubr (&Scharsetp);
2396 defsubr (&Smap_charset_chars);
2397 defsubr (&Sdefine_charset_internal);
2398 defsubr (&Sdefine_charset_alias);
2399 defsubr (&Scharset_plist);
2400 defsubr (&Sset_charset_plist);
2401 defsubr (&Sunify_charset);
2402 defsubr (&Sget_unused_iso_final_char);
2403 defsubr (&Sdeclare_equiv_charset);
2404 defsubr (&Sfind_charset_region);
2405 defsubr (&Sfind_charset_string);
2406 defsubr (&Sdecode_char);
2407 defsubr (&Sencode_char);
2408 defsubr (&Ssplit_char);
2409 defsubr (&Smake_char);
2410 defsubr (&Schar_charset);
2411 defsubr (&Scharset_after);
2412 defsubr (&Siso_charset);
2413 defsubr (&Sclear_charset_maps);
2414 defsubr (&Scharset_priority_list);
2415 defsubr (&Sset_charset_priority);
2416 defsubr (&Scharset_id_internal);
2417
2418 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2419 doc: /* *List of directories to search for charset map files. */);
2420 Vcharset_map_path = Qnil;
2421
2422 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2423 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2424 inhibit_load_charset_map = 0;
2425
2426 DEFVAR_LISP ("charset-list", &Vcharset_list,
2427 doc: /* List of all charsets ever defined. */);
2428 Vcharset_list = Qnil;
2429
2430 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2431 doc: /* ISO639 language mnemonic symbol for the current language environment.
2432 If the current language environment is for multiple languages (e.g. "Latin-1"),
2433 the value may be a list of mnemonics. */);
2434 Vcurrent_iso639_language = Qnil;
2435
2436 charset_ascii
2437 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2438 0, 127, 'B', -1, 0, 1, 0, 0);
2439 charset_iso_8859_1
2440 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2441 0, 255, -1, -1, -1, 1, 0, 0);
2442 charset_unicode
2443 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2444 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2445 charset_emacs
2446 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2447 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2448 charset_eight_bit
2449 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2450 128, 255, -1, 0, -1, 0, 1,
2451 MAX_5_BYTE_CHAR + 1);
2452 charset_unibyte = charset_iso_8859_1;
2453 }
2454
2455 #endif /* emacs */
2456
2457 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2458 (do not change this comment) */