(SET_RAW_SYNTAX_ENTRY): Don't call make_number.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0
PE
26#ifdef emacs
27#include <config.h>
28#endif
29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869
KH
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
4ed46869 50
3263d5a2 51/*** GENERAL NOTE on CODED CHARACTER SET (CHARSET) ***
4ed46869 52
3263d5a2
KH
53 A coded character set ("charset" hereafter) is a meaningful
54 collection (i.e. language, culture, functionality, etc) of
55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
56 code, a charset is represented by symbol. In C code, a charset is
57 represented by its ID number or by a pointer the struct charset.
4ed46869 58
3263d5a2
KH
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
62 charset_table as struct charset.
4ed46869 63
3263d5a2 64*/
4ed46869 65
3263d5a2
KH
66/* List of all charsets. This variable is used only from Emacs
67 Lisp. */
4ed46869
KH
68Lisp_Object Vcharset_list;
69
3263d5a2
KH
70/* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72Lisp_Object Vcharset_hash_table;
73
74/* Table of struct charset. */
75struct charset *charset_table;
76
77static int charset_table_size;
78int charset_table_used;
79
80Lisp_Object Qcharsetp;
81
82/* Special charset symbols. */
83Lisp_Object Qascii;
84Lisp_Object Qeight_bit_control;
85Lisp_Object Qeight_bit_graphic;
86Lisp_Object Qiso_8859_1;
87Lisp_Object Qunicode;
b0e3cf2b 88
3263d5a2
KH
89/* The corresponding charsets. */
90int charset_ascii;
91int charset_8_bit_control;
92int charset_8_bit_graphic;
93int charset_iso_8859_1;
94int charset_unicode;
c1a08b4c 95
3263d5a2
KH
96/* Value of charset attribute `charset-iso-plane'. */
97Lisp_Object Qgl, Qgr;
c1a08b4c 98
3263d5a2
KH
99/* The primary charset. It is a charset of unibyte characters. */
100int charset_primary;
101
102/* List of charsets ordered by the priority. */
103Lisp_Object Vcharset_ordered_list;
104
105/* List of iso-2022 charsets. */
106Lisp_Object Viso_2022_charset_list;
107
108/* List of emacs-mule charsets. */
109Lisp_Object Vemacs_mule_charset_list;
110
111struct charset *emacs_mule_charset[256];
4ed46869
KH
112
113/* Mapping table from ISO2022's charset (specified by DIMENSION,
114 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
115int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
116
117Lisp_Object Vcharset_map_directory;
118
119Lisp_Object Vchar_unified_charset_table;
120
121#define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (code) - (charset)->min_code \
124 : ((((code) >> 24) <= (charset)->code_space[13]) \
125 && ((((code) >> 16) & 0xFF) <= (charset)->code_space[9]) \
126 && ((((code) >> 8) & 0xFF) <= (charset)->code_space[5]) \
127 && (((code) & 0xFF) <= (charset)->code_space[1])) \
128 ? (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0])) \
135 : -1)
136
137
138/* Convert the character index IDX to code-point CODE for CHARSET.
139 It is assumed that IDX is in a valid range. */
140
141#define INDEX_TO_CODE_POINT(charset, idx) \
142 ((charset)->code_linear_p \
143 ? (idx) + (charset)->min_code \
144 : (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
147 << 8) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
150 << 16) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
152 << 24)))
4ed46869 153
3263d5a2 154\f
4ed46869 155
3263d5a2
KH
156/* Set to 1 when a charset map is loaded to warn that a buffer text
157 and a string data may be relocated. */
158int charset_map_loaded;
35e623fb 159
3263d5a2
KH
160/* Parse the mapping vector MAP which has this form:
161 [CODE0 CHAR0 CODE1 CHAR1 ... ]
4cf9710d 162
3263d5a2 163 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 164
3263d5a2
KH
165 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
166 CHARSET->decoder, and CHARSET->encoder.
93bcb785 167
3263d5a2
KH
168 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
169 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
170 setup it too. */
4ed46869 171
3263d5a2
KH
172static void
173parse_charset_map (charset, map, control_flag)
174 struct charset *charset;
175 Lisp_Object map;
176 int control_flag;
4ed46869 177{
3263d5a2
KH
178 Lisp_Object vec, table;
179 unsigned min_code = CHARSET_MIN_CODE (charset);
180 unsigned max_code = CHARSET_MAX_CODE (charset);
181 int ascii_compatible_p = charset->ascii_compatible_p;
182 int min_char, max_char, nonascii_min_char;
183 int size;
184 int i;
185 int first;
186 unsigned char *fast_map = charset->fast_map;
99529c2c 187
3263d5a2 188 if (control_flag)
8ac5a9cc 189 {
3263d5a2
KH
190 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
191 unsigned invalid_code = CHARSET_INVALID_CODE (charset);
6662e69b 192
3263d5a2
KH
193 table = Fmake_char_table (Qnil, make_number (invalid_code));
194 if (control_flag == 1)
195 vec = Fmake_vector (make_number (n), make_number (-1));
196 else if (! CHAR_TABLE_P (Vchar_unify_table))
197 Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1));
6662e69b 198
3263d5a2 199 charset_map_loaded = 1;
2e344af3 200 }
3263d5a2
KH
201
202 size = ASIZE (map);
203 nonascii_min_char = MAX_CHAR;
204 CHARSET_COMPACT_CODES_P (charset) = 1;
205 for (first = 1, i = 0; i < size; i += 2)
2e344af3 206 {
3263d5a2
KH
207 Lisp_Object val;
208 unsigned code, temp;
209 int c, char_index;
210
211 val = AREF (map, i);
212 CHECK_NATNUM (val);
213 code = XFASTINT (val);
214 val = AREF (map, i + 1);
215 CHECK_NATNUM (val);
216 c = XFASTINT (val);
217
218 if (code < min_code || code > max_code)
219 continue;
220 char_index = CODE_POINT_TO_INDEX (charset, code);
221 if (char_index < 0
222 || c > MAX_CHAR)
223 continue;
224
225 if (control_flag < 2)
226 {
227 if (first)
228 {
229 min_char = max_char = c;
230 first = 0;
231 }
232 else if (c > max_char)
233 max_char = c;
234 else if (c < min_char)
235 min_char = c;
236 if (ascii_compatible_p && ! ASCII_BYTE_P (c)
237 && c < nonascii_min_char)
238 nonascii_min_char = c;
239
240 CHARSET_FAST_MAP_SET (c, fast_map);
241 }
242
243 if (control_flag)
2e344af3 244 {
3263d5a2
KH
245 if (control_flag == 1)
246 {
247 if (char_index >= ASIZE (vec))
248 abort ();
249 ASET (vec, char_index, make_number (c));
250 if (code > 0x7FFFFFF)
251 {
252 CHAR_TABLE_SET (table, c,
253 Fcons (make_number (code >> 16),
254 make_number (code & 0xFFFF)));
255 CHARSET_COMPACT_CODES_P (charset) = 0;
256 }
257 else
258 CHAR_TABLE_SET (table, c, make_number (code));
259 }
260 else
261 {
262 int c1 = DECODE_CHAR (charset, code);
263 if (c1 >= 0)
264 {
265 CHAR_TABLE_SET (table, c, make_number (c1));
266 CHAR_TABLE_SET (Vchar_unify_table, c1, c);
267 if (CHAR_TABLE_P (Vchar_unified_charset_table))
268 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
269 CHARSET_NAME (charset));
270 }
271 }
2e344af3 272 }
8ac5a9cc 273 }
3263d5a2
KH
274
275 if (control_flag < 2)
4ed46869 276 {
3263d5a2
KH
277 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
278 ? nonascii_min_char : min_char);
279 CHARSET_MAX_CHAR (charset) = max_char;
280 if (control_flag)
4ed46869 281 {
3263d5a2
KH
282 CHARSET_DECODER (charset) = vec;
283 CHARSET_ENCODER (charset) = table;
4ed46869
KH
284 }
285 }
2e344af3 286 else
3263d5a2 287 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
288}
289
12bcae05 290
3263d5a2
KH
291/* Read a hexadecimal number (preceded by "0x") from the file FP while
292 paying attention to comment charcter '#'. */
12bcae05 293
3263d5a2
KH
294static INLINE unsigned
295read_hex (fp, eof)
296 FILE *fp;
297 int *eof;
12bcae05 298{
3263d5a2
KH
299 int c;
300 unsigned n;
12bcae05 301
3263d5a2
KH
302 while ((c = getc (fp)) != EOF)
303 {
304 if (c == '#' || c == ' ')
305 {
306 while ((c = getc (fp)) != EOF && c != '\n');
307 }
308 else if (c == '0')
309 {
310 if ((c = getc (fp)) == EOF || c == 'x')
311 break;
312 }
313 }
314 if (c == EOF)
315 {
316 *eof = 1;
317 return 0;
318 }
319 *eof = 0;
320 n = 0;
321 if (c == 'x')
322 while ((c = getc (fp)) != EOF && isxdigit (c))
323 n = ((n << 4)
324 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
325 else
326 while ((c = getc (fp)) != EOF && isdigit (c))
327 n = (n * 10) + c - '0';
328 return n;
329}
12bcae05 330
537efd8d 331
3263d5a2
KH
332/* Return a mapping vector for CHARSET loaded from MAPFILE.
333 Each line of MAPFILE has this form:
334 0xAAAA 0xBBBB
335 where 0xAAAA is a code-point and 0xBBBB is the corresponding
336 character code.
337 The returned vector has this form:
338 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
339*/
4ed46869 340
3263d5a2
KH
341static Lisp_Object
342load_charset_map (charset, mapfile)
343 struct charset *charset;
344 Lisp_Object mapfile;
4ed46869 345{
3263d5a2
KH
346 int fd;
347 FILE *fp;
348 int num;
349 unsigned *numbers_table[256];
350 int numbers_table_used;
351 unsigned *numbers;
352 int eof;
353 Lisp_Object suffixes;
354 Lisp_Object vec;
355 int i;
4ed46869 356
3263d5a2
KH
357 suffixes = Fcons (build_string (".map"),
358 Fcons (build_string (".TXT"), Qnil));
4ed46869 359
3263d5a2
KH
360 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
361 NULL, 0);
362 if (fd < 0
363 || ! (fp = fdopen (fd, "r")))
364 {
365 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
366 return Qnil;
367 }
4ed46869 368
3263d5a2
KH
369 numbers_table_used = 0;
370 num = 0;
371 eof = 0;
372 while (1)
373 {
374 unsigned n = read_hex (fp, &eof);
4ed46869 375
3263d5a2
KH
376 if (eof)
377 break;
378 if ((num % 0x10000) == 0)
379 {
380 if (numbers_table_used == 256)
381 break;
382 numbers = (unsigned *) alloca (sizeof (unsigned) * 0x10000);
383 numbers_table[numbers_table_used++] = numbers;
384 }
385 *numbers++ = n;
386 num++;
387 }
388 fclose (fp);
389 close (fd);
4ed46869 390
3263d5a2
KH
391 vec = Fmake_vector (make_number (num), Qnil);
392 for (i = 0; i < num; i++, numbers++)
393 {
394 if ((i % 0x10000) == 0)
395 numbers = numbers_table[i / 0x10000];
396 ASET (vec, i, make_number (*numbers));
397 }
4ed46869 398
3263d5a2
KH
399 charset_map_loaded = 1;
400
401 return vec;
ac4137cc
KH
402}
403
3263d5a2
KH
404static void
405load_charset (charset)
406 struct charset *charset;
ac4137cc 407{
3263d5a2
KH
408 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
409 {
410 Lisp_Object map;
ac4137cc 411
3263d5a2
KH
412 map = CHARSET_MAP (charset);
413 if (STRINGP (map))
414 map = load_charset_map (charset, map);
415 parse_charset_map (charset, map, 1);
416 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
417 }
4ed46869
KH
418}
419
3263d5a2
KH
420
421DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
422 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
423 (object)
424 Lisp_Object object;
23d2a7f1 425{
3263d5a2 426 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
427}
428
35e623fb 429
3263d5a2
KH
430void
431map_charset_chars (c_function, function, charset_symbol, arg)
432 void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object);
433 Lisp_Object function, charset_symbol, arg;
35e623fb 434{
3263d5a2
KH
435 int id;
436 struct charset *charset;
437 Lisp_Object range;
d2665018 438
3263d5a2
KH
439 CHECK_CHARSET_GET_ID (charset_symbol, id);
440 charset = CHARSET_FROM_ID (id);
441
442 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
443 load_charset (charset);
444
445 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
446 {
447 range = Fcons (make_number (CHARSET_MIN_CHAR (charset)),
448 make_number (CHARSET_MAX_CHAR (charset)));
449 if (NILP (function))
450 (*c_function) (arg, range, Qnil);
451 else
452 call2 (function, range, arg);
453 }
454 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
455 {
456 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
457 return;
458 if (CHARSET_ASCII_COMPATIBLE_P (charset))
bbf12bb3 459 {
3263d5a2
KH
460 range = Fcons (make_number (0), make_number (127));
461 if (NILP (function))
462 (*c_function) (arg, range, Qnil);
463 else
464 call2 (function, range, arg);
bbf12bb3 465 }
3263d5a2
KH
466 map_char_table (c_function, function, CHARSET_ENCODER (charset), arg,
467 0, NULL);
468 }
469 else /* i.e. CHARSET_METHOD_PARENT */
470 {
471 int from, to, c;
472 unsigned code;
473 int i, j, k, l;
474 int *code_space = CHARSET_CODE_SPACE (charset);
475 Lisp_Object val;
476
477 range = Fcons (Qnil, Qnil);
478 from = to = -2;
479 for (i = code_space[12]; i <= code_space[13]; i++)
480 for (j = code_space[8]; j <= code_space[9]; j++)
481 for (k = code_space[4]; k <= code_space[5]; k++)
482 for (l = code_space[0]; l <= code_space[1]; l++)
483 {
484 code = (i << 24) | (j << 16) | (k << 8) | l;
485 c = DECODE_CHAR (charset, code);
486 if (c == to + 1)
487 {
488 to++;
489 continue;
490 }
491 if (from >= 0)
492 {
493 if (from < to)
494 {
495 XSETCAR (range, make_number (from));
496 XSETCDR (range, make_number (to));
497 val = range;
498 }
499 else
500 val = make_number (from);
501 if (NILP (function))
502 (*c_function) (arg, val, Qnil);
503 else
504 call2 (function, val, arg);
505 }
506 from = to = (c < 0 ? -2 : c);
507 }
508 if (from >= 0)
bbf12bb3 509 {
3263d5a2
KH
510 if (from < to)
511 {
512 XSETCAR (range, make_number (from));
513 XSETCDR (range, make_number (to));
514 val = range;
515 }
516 else
517 val = make_number (from);
518 if (NILP (function))
519 (*c_function) (arg, val, Qnil);
520 else
521 call2 (function, val, arg);
bbf12bb3 522 }
35e623fb 523 }
3263d5a2
KH
524}
525
526DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0,
527 doc: /* Call FUNCTION for each characters in CHARSET.
528FUNCTION is called with three arguments; FROM, TO, and the 3rd optional
529argument ARG.
530FROM and TO indicates a range of character sequence that are contained
531in CHARSET. */)
532 (function, charset, arg)
533 Lisp_Object function, charset, arg;
534{
535 map_charset_chars (NULL, function, charset, arg);
536 return Qnil;
35e623fb 537}
76d7b829
KH
538
539
3263d5a2
KH
540/* Define a charset according to the arguments. The Nth argument is
541 the Nth attribute of the charset (the last attribute `charset-id'
542 is not included). See the docstring of `define-charset' for the
543 detail. */
76d7b829 544
3263d5a2
KH
545DEFUN ("define-charset-internal", Fdefine_charset_internal,
546 Sdefine_charset_internal, charset_arg_max, MANY, 0,
547 doc: /* For internal use only. */)
548 (nargs, args)
549 int nargs;
550 Lisp_Object *args;
76d7b829 551{
3263d5a2
KH
552 /* Charset attr vector. */
553 Lisp_Object attrs;
554 Lisp_Object val;
555 unsigned hash_code;
556 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
557 int i;
558 struct charset charset;
559 int id;
560 int dimension;
561 int new_definition_p;
562 int nchars;
563
564 if (nargs != charset_arg_max)
565 return Fsignal (Qwrong_number_of_arguments,
566 Fcons (intern ("define-charset-internal"),
567 make_number (nargs)));
568
569 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
570
571 CHECK_SYMBOL (args[charset_arg_name]);
572 ASET (attrs, charset_name, args[charset_arg_name]);
573
574 val = args[charset_arg_code_space];
575 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 576 {
3263d5a2
KH
577 int min_byte, max_byte;
578
579 min_byte = XINT (Faref (val, make_number (i * 2)));
580 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
581 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
582 error ("Invalid :code-space value");
583 charset.code_space[i * 4] = min_byte;
584 charset.code_space[i * 4 + 1] = max_byte;
585 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
586 nchars *= charset.code_space[i * 4 + 2];
587 charset.code_space[i * 4 + 3] = nchars;
588 if (max_byte > 0)
589 dimension = i + 1;
590 }
76d7b829 591
3263d5a2
KH
592 val = args[charset_arg_dimension];
593 if (NILP (val))
594 charset.dimension = dimension;
595 else
596 {
597 CHECK_NATNUM (val);
598 charset.dimension = XINT (val);
599 if (charset.dimension < 1 || charset.dimension > 4)
600 args_out_of_range_3 (val, make_number (1), make_number (4));
601 }
602
603 charset.code_linear_p
604 = (charset.dimension == 1
605 || (charset.code_space[2] == 256
606 && (charset.dimension == 2
607 || (charset.code_space[6] == 256
608 && (charset.dimension == 3
609 || charset.code_space[10] == 256)))));
610
611 charset.iso_chars_96 = charset.code_space[2] == 96;
612
613 charset.min_code = (charset.code_space[0]
614 | (charset.code_space[4] << 8)
615 | (charset.code_space[8] << 16)
616 | (charset.code_space[12] << 24));
617 charset.max_code = (charset.code_space[1]
618 | (charset.code_space[5] << 8)
619 | (charset.code_space[9] << 16)
620 | (charset.code_space[13] << 24));
621
622 val = args[charset_arg_invalid_code];
623 if (NILP (val))
624 {
625 if (charset.min_code > 0)
626 charset.invalid_code = 0;
bbf12bb3
KH
627 else
628 {
3263d5a2
KH
629 XSETINT (val, charset.max_code + 1);
630 if (XINT (val) == charset.max_code + 1)
631 charset.invalid_code = charset.max_code + 1;
632 else
633 error ("Attribute :invalid-code must be specified");
76d7b829 634 }
76d7b829 635 }
3263d5a2
KH
636 else
637 {
638 CHECK_NATNUM (val);
639 charset.invalid_code = XFASTINT (val);
640 }
76d7b829 641
3263d5a2
KH
642 val = args[charset_arg_iso_final];
643 if (NILP (val))
644 charset.iso_final = -1;
645 else
646 {
647 CHECK_NUMBER (val);
648 if (XINT (val) < '0' || XINT (val) > 127)
649 error ("Invalid iso-final-char: %d", XINT (val));
650 charset.iso_final = XINT (val);
651 }
652
653 val = args[charset_arg_iso_revision];
654 if (NILP (val))
655 charset.iso_revision = -1;
656 else
4ed46869 657 {
3263d5a2
KH
658 CHECK_NUMBER (val);
659 if (XINT (val) > 63)
660 args_out_of_range (make_number (63), val);
661 charset.iso_revision = XINT (val);
4ed46869 662 }
3263d5a2
KH
663
664 val = args[charset_arg_emacs_mule_id];
665 if (NILP (val))
666 charset.emacs_mule_id = -1;
4ed46869
KH
667 else
668 {
3263d5a2
KH
669 CHECK_NATNUM (val);
670 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
671 error ("Invalid emacs-mule-id: %d", XINT (val));
672 charset.emacs_mule_id = XINT (val);
c83ef371 673 }
6ef23ebb 674
3263d5a2 675 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 676
3263d5a2
KH
677 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
678
679 charset.unified_p = 0;
680
681 bzero (charset.fast_map, sizeof (charset.fast_map));
682
683 if (! NILP (args[charset_arg_code_offset]))
684 {
685 val = args[charset_arg_code_offset];
686 CHECK_NUMBER (val);
687
688 charset.method = CHARSET_METHOD_OFFSET;
689 charset.code_offset = XINT (val);
690
691 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
692 charset.min_char = i + charset.code_offset;
693 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
694 charset.max_char = i + charset.code_offset;
695 if (charset.max_char > MAX_CHAR)
696 error ("Unsupported max char: %d", charset.max_char);
697
698 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
699 i += 128)
700 CHARSET_FAST_MAP_SET (i, charset.fast_map);
701 for (; i <= charset.max_char; i += 0x1000)
702 CHARSET_FAST_MAP_SET (i, charset.fast_map);
703 }
704 else if (! NILP (args[charset_arg_map]))
705 {
706 val = args[charset_arg_map];
707 ASET (attrs, charset_map, val);
708 if (STRINGP (val))
709 val = load_charset_map (&charset, val);
710 CHECK_VECTOR (val);
711 parse_charset_map (&charset, val, 0);
712 charset.method = CHARSET_METHOD_MAP_DEFERRED;
713 }
714 else if (! NILP (args[charset_arg_parents]))
715 {
716 val = args[charset_arg_parents];
717 CHECK_LIST (val);
718 charset.method = CHARSET_METHOD_INHERIT;
719 val = Fcopy_sequence (val);
720 ASET (attrs, charset_parents, val);
721
722 charset.min_char = MAX_CHAR;
723 charset.max_char = 0;
724 for (; ! NILP (val); val = Fcdr (val))
4ed46869 725 {
3263d5a2
KH
726 Lisp_Object elt, car_part, cdr_part;
727 int this_id, offset;
728 struct charset *this_charset;
729
730 elt = Fcar (val);
731 if (CONSP (elt))
732 {
733 car_part = XCAR (elt);
734 cdr_part = XCDR (elt);
735 CHECK_CHARSET_GET_ID (car_part, this_id);
736 CHECK_NUMBER (cdr_part);
737 offset = XINT (cdr_part);
738 }
739 else
4ed46869 740 {
3263d5a2
KH
741 CHECK_CHARSET_GET_ID (elt, this_id);
742 offset = 0;
4ed46869 743 }
3263d5a2
KH
744 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
745
746 this_charset = CHARSET_FROM_ID (this_id);
747 if (charset.min_char > this_charset->min_char)
748 charset.min_char = this_charset->min_char;
749 if (charset.max_char < this_charset->max_char)
750 charset.max_char = this_charset->max_char;
751 for (i = 0; i < 190; i++)
752 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 753 }
4ed46869 754 }
3263d5a2
KH
755 else
756 error ("None of :code-offset, :map, :parents are specified");
4ed46869 757
3263d5a2
KH
758 val = args[charset_arg_unify_map];
759 if (! NILP (val) && !STRINGP (val))
760 CHECK_VECTOR (val);
761 ASET (attrs, charset_unify_map, val);
4ed46869 762
3263d5a2
KH
763 CHECK_LIST (args[charset_arg_plist]);
764 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 765
3263d5a2
KH
766 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
767 &hash_code);
768 if (charset.hash_index >= 0)
769 {
770 new_definition_p = 0;
771 HASH_VALUE (hash_table, charset.hash_index) = attrs;
772 }
1a45ff10 773 else
3263d5a2
KH
774 {
775 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
776 hash_code);
777 if (charset_table_used == charset_table_size)
778 {
779 charset_table_size += 256;
780 charset_table
781 = ((struct charset *)
782 xrealloc (charset_table,
783 sizeof (struct charset) * charset_table_size));
784 }
785 id = charset_table_used++;
786 ASET (attrs, charset_id, make_number (id));
787 new_definition_p = 1;
788 }
4ed46869 789
4ed46869 790
3263d5a2
KH
791 charset.id = id;
792 charset_table[id] = charset;
793
794 if (charset.iso_final >= 0)
4ed46869 795 {
3263d5a2
KH
796 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
797 charset.iso_final) = id;
798 if (new_definition_p)
799 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
800 Fcons (make_number (id), Qnil));
4ed46869 801 }
3263d5a2
KH
802
803 if (charset.emacs_mule_id >= 0)
4ed46869 804 {
3263d5a2
KH
805 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
806 if (new_definition_p)
807 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
808 Fcons (make_number (id), Qnil));
4ed46869
KH
809 }
810
3263d5a2
KH
811 if (new_definition_p)
812 {
813 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
814 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
815 Fcons (make_number (id), Qnil));
816 }
4ed46869 817
3263d5a2 818 return Qnil;
4ed46869
KH
819}
820
3263d5a2
KH
821
822DEFUN ("define-charset-alias", Fdefine_charset_alias,
823 Sdefine_charset_alias, 2, 2, 0,
824 doc: /* Define ALIAS as an alias for charset CHARSET. */)
825 (alias, charset)
826 Lisp_Object alias, charset;
4ed46869 827{
3263d5a2
KH
828 Lisp_Object attr;
829
830 CHECK_CHARSET_GET_ATTR (charset, attr);
831 Fputhash (alias, attr, Vcharset_hash_table);
832 return Qnil;
833}
4ed46869 834
4ed46869 835
3263d5a2
KH
836DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
837 doc: /* Return the primary charset. */)
838 ()
839{
840 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
841}
4ed46869 842
4ed46869 843
3263d5a2
KH
844DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
845 1, 1, 0,
846 doc: /* Set the primary charset to CHARSET. */)
847 (charset)
848 Lisp_Object charset;
849{
850 int id;
851
852 CHECK_CHARSET_GET_ID (charset, id);
853 charset_primary = id;
4ed46869
KH
854 return Qnil;
855}
856
3263d5a2
KH
857
858DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
859 doc: /* Return a property list of CHARSET. */)
860 (charset)
861 Lisp_Object charset;
862{
863 Lisp_Object attrs;
864
865 CHECK_CHARSET_GET_ATTR (charset, attrs);
866 return CHARSET_ATTR_PLIST (attrs);
867}
868
869
870DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
871 doc: /* Set CHARSET's property list to PLIST. */)
872 (charset, plist)
873 Lisp_Object charset, plist;
874{
875 Lisp_Object attrs;
876
877 CHECK_CHARSET_GET_ATTR (charset, attrs);
878 CHARSET_ATTR_PLIST (attrs) = plist;
879 return plist;
880}
881
882
883DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0,
884 doc: /* Unify characters of CHARSET with Unicode. */)
885 (charset, unify_map)
886 Lisp_Object charset, unify_map;
8a73a704 887{
3263d5a2
KH
888 int id;
889 struct charset *cs;
890
891 CHECK_CHARSET_GET_ID (charset, id);
892 cs = CHARSET_FROM_ID (id);
893 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
894 load_charset (cs);
895 if (CHARSET_UNIFIED_P (cs)
896 && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs)))
897 return Qnil;
898 CHARSET_UNIFIED_P (cs) = 0;
899 if (NILP (unify_map))
900 unify_map = CHARSET_UNIFY_MAP (cs);
901 if (STRINGP (unify_map))
902 unify_map = load_charset_map (cs, unify_map);
903 parse_charset_map (cs, unify_map, 2);
904 CHARSET_UNIFIED_P (cs) = 1;
905 return Qnil;
8a73a704
KH
906}
907
3fac5a51
KH
908DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
909 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2
KH
910 doc: /*
911Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
912DIMENSION is the number of bytes to represent a character: 1 or 2.
913CHARS is the number of characters in a dimension: 94 or 96.
914
915This final char is for private use, thus the range is `0' (48) .. `?' (63).
3263d5a2 916If there's no unused final char for the attrified kind of charset,
fdb82f93
PJ
917return nil. */)
918 (dimension, chars)
3fac5a51
KH
919 Lisp_Object dimension, chars;
920{
921 int final_char;
922
b7826503
PJ
923 CHECK_NUMBER (dimension);
924 CHECK_NUMBER (chars);
3263d5a2
KH
925 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
926 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 927 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 928 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 929 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
930 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
931 break;
3fac5a51
KH
932 return (final_char <= '?' ? make_number (final_char) : Qnil);
933}
934
3263d5a2
KH
935static void
936check_iso_charset_parameter (dimension, chars, final_char)
937 Lisp_Object dimension, chars, final_char;
4ed46869 938{
3263d5a2
KH
939 CHECK_NATNUM (dimension);
940 CHECK_NATNUM (chars);
941 CHECK_NATNUM (final_char);
4ed46869 942
3263d5a2
KH
943 if (XINT (dimension) > 3)
944 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
945 if (XINT (chars) != 94 && XINT (chars) != 96)
946 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 947 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 948 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
949}
950
951
952DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
953 4, 4, 0,
954 doc: /*
955Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
956CHARSET should be defined by `defined-charset' in advance. */)
957 (dimension, chars, final_char, charset)
958 Lisp_Object dimension, chars, final_char, charset;
959{
960 int id;
4ed46869 961
3263d5a2
KH
962 CHECK_CHARSET_GET_ID (charset, id);
963 check_iso_charset_parameter (dimension, chars, final_char);
964
965 ISO_CHARSET_TABLE (dimension, chars, final_char) = id;
4ed46869
KH
966 return Qnil;
967}
968
3263d5a2 969
2e344af3
KH
970/* Return information about charsets in the text at PTR of NBYTES
971 bytes, which are NCHARS characters. The value is:
f6302ac9 972
cfe34140 973 0: Each character is represented by one byte. This is always
3263d5a2
KH
974 true for a unibyte string. For a multibyte string, true if
975 it contains only ASCII characters.
976
977 1: No charsets other than ascii, eight-bit-control, and
978 latin-1 are found.
1d67c29b 979
3263d5a2
KH
980 2: Otherwise.
981*/
4ed46869
KH
982
983int
3263d5a2
KH
984string_xstring_p (string)
985 Lisp_Object string;
4ed46869 986{
3263d5a2
KH
987 unsigned char *p = XSTRING (string)->data;
988 unsigned char *endp = p + STRING_BYTES (XSTRING (string));
989 struct charset *charset;
990
991 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
992 return 0;
993
994 charset = CHARSET_FROM_ID (charset_iso_8859_1);
995 while (p < endp)
0282eb69 996 {
3263d5a2 997 int c = STRING_CHAR_ADVANCE (p);
2e344af3 998
3263d5a2
KH
999 if (ENCODE_CHAR (charset, c) < 0)
1000 return 2;
0282eb69 1001 }
3263d5a2
KH
1002 return 1;
1003}
05505664 1004
05505664 1005
3263d5a2 1006/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1007
3263d5a2
KH
1008 CHARSETS is a vector. Each element is a cons of CHARSET and
1009 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1010 FOUND-FLAG t (or nil) means that the corresponding charset is
1011 already found (or not yet found).
2e344af3 1012
3263d5a2 1013 It may lookup a translation table TABLE if supplied. */
2e344af3 1014
3263d5a2
KH
1015static void
1016find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
1017 unsigned char *ptr;
1018 int nchars, nbytes;
1019 Lisp_Object charsets, table;
1020{
1021 unsigned char *pend = ptr + nbytes;
1022 int ncharsets = ASIZE (charsets);
1023
1024 if (nchars == nbytes)
1025 return;
1026
1027 while (ptr < pend)
1028 {
1029 int c = STRING_CHAR_ADVANCE (ptr);
1030 int i;
1031 int all_found = 1;
1032 Lisp_Object elt;
1033
1034 if (!NILP (table))
1035 c = translate_char (table, c);
1036 for (i = 0; i < ncharsets; i++)
1037 {
1038 elt = AREF (charsets, i);
1039 if (NILP (XCDR (elt)))
1040 {
1041 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1042
1043 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1044 XCDR (elt) = Qt;
1045 else
1046 all_found = 0;
1047 }
4ed46869 1048 }
3263d5a2
KH
1049 if (all_found)
1050 break;
4ed46869 1051 }
4ed46869
KH
1052}
1053
3263d5a2 1054
4ed46869 1055DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1056 2, 3, 0,
fdb82f93
PJ
1057 doc: /* Return a list of charsets in the region between BEG and END.
1058BEG and END are buffer positions.
1059Optional arg TABLE if non-nil is a translation table to look up.
1060
1061If the region contains invalid multibyte characters,
1062`unknown' is included in the returned list.
1063
1064If the current buffer is unibyte, the returned list may contain
1065only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1066 (beg, end, table)
23d2a7f1 1067 Lisp_Object beg, end, table;
4ed46869 1068{
3263d5a2 1069 Lisp_Object charsets;
6ae1f27e 1070 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1071 Lisp_Object val;
1072
1073 validate_region (&beg, &end);
1074 from = XFASTINT (beg);
1075 stop = to = XFASTINT (end);
6ae1f27e 1076
4ed46869 1077 if (from < GPT && GPT < to)
6ae1f27e
RS
1078 {
1079 stop = GPT;
1080 stop_byte = GPT_BYTE;
1081 }
1082 else
1083 stop_byte = CHAR_TO_BYTE (stop);
1084
1085 from_byte = CHAR_TO_BYTE (from);
1086
3263d5a2
KH
1087 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1088 for (i = 0; i < charset_table_used; i++)
1089 ASET (charsets, i, Fcons (make_number (i), Qnil));
1090
4ed46869
KH
1091 while (1)
1092 {
3263d5a2
KH
1093 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1094 stop_byte - from_byte, charsets, table);
4ed46869 1095 if (stop < to)
6ae1f27e
RS
1096 {
1097 from = stop, from_byte = stop_byte;
1098 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1099 }
4ed46869
KH
1100 else
1101 break;
1102 }
6ae1f27e 1103
4ed46869 1104 val = Qnil;
3263d5a2
KH
1105 for (i = charset_table_used - 1; i >= 0; i--)
1106 if (!NILP (XCDR (AREF (charsets, i))))
1107 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1108 return val;
1109}
1110
1111DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1112 1, 2, 0,
fdb82f93
PJ
1113 doc: /* Return a list of charsets in STR.
1114Optional arg TABLE if non-nil is a translation table to look up.
1115
1116If the string contains invalid multibyte characters,
1117`unknown' is included in the returned list.
1118
1119If STR is unibyte, the returned list may contain
3263d5a2 1120only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1121 (str, table)
23d2a7f1 1122 Lisp_Object str, table;
4ed46869 1123{
3263d5a2 1124 Lisp_Object charsets;
4ed46869
KH
1125 int i;
1126 Lisp_Object val;
1127
b7826503 1128 CHECK_STRING (str);
87b089ad 1129
3263d5a2
KH
1130 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1131 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1132 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1133
4ed46869 1134 val = Qnil;
3263d5a2
KH
1135 for (i = charset_table_used - 1; i >= 0; i--)
1136 if (!NILP (XCDR (AREF (charsets, i))))
1137 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1138 return val;
1139}
2e344af3 1140
4ed46869 1141\f
3263d5a2
KH
1142
1143/* Return a character correponding to the code-point CODE of
1144 CHARSET. */
1145
1146int
1147decode_char (charset, code)
1148 struct charset *charset;
1149 unsigned code;
4ed46869 1150{
3263d5a2
KH
1151 int c, char_index;
1152 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1153
3263d5a2
KH
1154 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1155 return -1;
4ed46869 1156
3263d5a2 1157 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1158 {
3263d5a2
KH
1159 load_charset (charset);
1160 method = CHARSET_METHOD (charset);
ac4137cc 1161 }
4ed46869 1162
3263d5a2 1163 if (method == CHARSET_METHOD_INHERIT)
2e344af3 1164 {
3263d5a2 1165 Lisp_Object parents;
4ed46869 1166
3263d5a2
KH
1167 parents = CHARSET_PARENTS (charset);
1168 c = -1;
1169 for (; CONSP (parents); parents = XCDR (parents))
1170 {
1171 int id = XINT (XCAR (XCAR (parents)));
1172 int code_offset = XINT (XCDR (XCAR (parents)));
1173 unsigned this_code = code + code_offset;
9d3d8cba 1174
3263d5a2
KH
1175 charset = CHARSET_FROM_ID (id);
1176 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1177 break;
1178 }
1179 }
1180 else
ac4137cc 1181 {
3263d5a2
KH
1182 char_index = CODE_POINT_TO_INDEX (charset, code);
1183
1184 if (method == CHARSET_METHOD_MAP)
ac4137cc 1185 {
3263d5a2
KH
1186 Lisp_Object decoder;
1187
1188 decoder = CHARSET_DECODER (charset);
1189 if (! VECTORP (decoder))
1190 return -1;
1191 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1192 }
1193 else
1194 {
3263d5a2 1195 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1196 }
1197 }
9d3d8cba 1198
3263d5a2
KH
1199 if (CHARSET_UNIFIED_P (charset)
1200 && c >= 0)
1201 MAYBE_UNIFY_CHAR (c);
d2665018 1202
3263d5a2 1203 return c;
d2665018
KH
1204}
1205
1bcc1567 1206
3263d5a2
KH
1207/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1208 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1bcc1567 1209
3263d5a2
KH
1210unsigned
1211encode_char (charset, c)
1212 struct charset *charset;
9b6a601f
KH
1213 int c;
1214{
3263d5a2
KH
1215 unsigned code;
1216 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1217
3263d5a2 1218 if (CHARSET_UNIFIED_P (charset))
4ed46869 1219 {
3263d5a2
KH
1220 Lisp_Object deunifier;
1221 int deunified;
4ed46869 1222
3263d5a2
KH
1223 deunifier = CHARSET_DEUNIFIER (charset);
1224 if (! CHAR_TABLE_P (deunifier))
1225 {
1226 Funify_charset (CHARSET_NAME (charset), Qnil);
1227 deunifier = CHARSET_DEUNIFIER (charset);
1228 }
1229 deunified = XINT (CHAR_TABLE_REF (deunifier, c));
1230 if (deunified > 0)
1231 c = deunified;
4ed46869 1232 }
beeedaad 1233
3263d5a2
KH
1234 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1235 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1236 return CHARSET_INVALID_CODE (charset);
beeedaad 1237
3263d5a2 1238 if (method == CHARSET_METHOD_INHERIT)
859f2b3c 1239 {
3263d5a2 1240 Lisp_Object parents;
859f2b3c 1241
3263d5a2
KH
1242 parents = CHARSET_PARENTS (charset);
1243 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1244 {
3263d5a2
KH
1245 int id = XINT (XCAR (XCAR (parents)));
1246 int code_offset = XINT (XCDR (XCAR (parents)));
1247 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1248
3263d5a2
KH
1249 code = ENCODE_CHAR (this_charset, c);
1250 if (code != CHARSET_INVALID_CODE (this_charset)
1251 && (code_offset < 0 || code >= code_offset))
1252 {
1253 code -= code_offset;
1254 if (CODE_POINT_TO_INDEX (charset, code) >= 0)
1255 return code;
1256 }
beeedaad 1257 }
3263d5a2
KH
1258 return CHARSET_INVALID_CODE (charset);
1259 }
99529c2c 1260
3263d5a2 1261 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1262 {
3263d5a2
KH
1263 load_charset (charset);
1264 method = CHARSET_METHOD (charset);
859f2b3c 1265 }
beeedaad 1266
3263d5a2 1267 if (method == CHARSET_METHOD_MAP)
3f62427c 1268 {
3263d5a2 1269 Lisp_Object encoder;
beeedaad 1270 Lisp_Object val;
beeedaad 1271
3263d5a2
KH
1272 encoder = CHARSET_ENCODER (charset);
1273 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1274 return CHARSET_INVALID_CODE (charset);
1275 val = CHAR_TABLE_REF (encoder, c);
1276 if (CONSP (val))
1277 code = (XINT (XCAR (val)) << 16) | XINT (XCDR (val));
3f62427c 1278 else
3263d5a2
KH
1279 code = XINT (val);
1280 }
1281 else
beeedaad 1282 {
3263d5a2
KH
1283 code = c - CHARSET_CODE_OFFSET (charset);
1284 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1285 }
beeedaad 1286
3263d5a2 1287 return code;
3f62427c
KH
1288}
1289
4ed46869 1290
3263d5a2
KH
1291DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1292 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1293Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1294
3263d5a2
KH
1295CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1296
1297Optional argument RESTRICTION specifies a way to map the pair of CCS
1298and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1299 (charset, code_point, restriction)
1300 Lisp_Object charset, code_point, restriction;
4ed46869 1301{
3263d5a2
KH
1302 int c, id;
1303 unsigned code;
1304 struct charset *charsetp;
4ed46869 1305
3263d5a2
KH
1306 CHECK_CHARSET_GET_ID (charset, id);
1307 if (CONSP (code_point))
1308 {
1309 CHECK_NATNUM (XCAR (code_point));
1310 CHECK_NATNUM (XCDR (code_point));
1311 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCAR (code_point)));
1312 }
1313 else
1314 {
1315 CHECK_NATNUM (code_point);
1316 code = XINT (code_point);
1317 }
1318 charsetp = CHARSET_FROM_ID (id);
1319 c = DECODE_CHAR (charsetp, code);
1320 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1321}
1322
046b1f03 1323
3263d5a2
KH
1324DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1325 doc: /* Encode the character CH into a code-point of CHARSET.
1326Return nil if CHARSET doesn't include CH.
17e7ef1b 1327
3263d5a2
KH
1328Optional argument RESTRICTION specifies a way to map CHAR to a
1329code-point in CCS. Currently not supported and just ignored. */)
1330 (ch, charset, restriction)
1331 Lisp_Object ch, charset, restriction;
1332{
1333 int c, id;
1334 unsigned code;
1335 struct charset *charsetp;
046b1f03 1336
3263d5a2
KH
1337 CHECK_CHARSET_GET_ID (charset, id);
1338 CHECK_NATNUM (ch);
1339 c = XINT (ch);
1340 charsetp = CHARSET_FROM_ID (id);
1341 code = ENCODE_CHAR (charsetp, ch);
1342 if (code == CHARSET_INVALID_CODE (charsetp))
1343 return Qnil;
1344 if (code > 0x7FFFFFF)
1345 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1346 return make_number (code);
6ae1f27e 1347}
9036eb45 1348
87b089ad 1349
3263d5a2
KH
1350DEFUN ("make-char", Fmake_char, Smake_char, 1, 4, 0,
1351 doc: /* Return a character of CHARSET whose position code is CODE.
a8a35e61 1352
3263d5a2
KH
1353If dimension of CHARSET is two, and the third optional arg CODE2 is
1354non-nil, CODE actually specifies the first byte of the position code,
1355and CODE2 specifies the second byte.
046b1f03 1356
3263d5a2
KH
1357If dimension of CHARSET is three, and the third optional arg CODE2 and
1358the fourth optional arg CODE3 are both non-nil, CODE actually
1359specifies the first byte of the position code, CODE2 the second byte,
1360and CODE3 the third byte. */)
1361 (charset, code, code2, code3)
1362 Lisp_Object charset, code, code2, code3;
87b089ad 1363{
3263d5a2
KH
1364 int id, dimension;
1365 struct charset *charsetp;
1366 unsigned c;
87b089ad 1367
3263d5a2
KH
1368 CHECK_CHARSET_GET_ID (charset, id);
1369 charsetp = CHARSET_FROM_ID (id);
87b089ad 1370
3263d5a2
KH
1371 if (NILP (code))
1372 code = make_number (CHARSET_MIN_CODE (charsetp));
1373 else
87b089ad 1374 {
3263d5a2
KH
1375 CHECK_NATNUM (code);
1376 dimension = CHARSET_DIMENSION (charsetp);
1377
1378 if (!NILP (code2))
1379 {
1380 CHECK_NATNUM (code2);
1381 if (dimension == 3)
1382 CHECK_NATNUM (code3);
1383 }
2e344af3
KH
1384 }
1385
3263d5a2
KH
1386 if (dimension == 1 || NILP (code2))
1387 c = XFASTINT (code);
1388 else if (dimension == 2)
1389 c = (XFASTINT (code) << 8) | XFASTINT (code2);
1390 else if (dimension == 3)
1391 c = (XFASTINT (code) << 16) | (XFASTINT (code2) << 8) | XFASTINT (code3);
1392
1393 c = DECODE_CHAR (charsetp, c);
1394 return make_number (c);
2e344af3
KH
1395}
1396
3263d5a2
KH
1397
1398/* Return the first charset in CHARSET_LIST that contains C.
1399 CHARSET_LIST is a list of charset IDs. If it is nil, use
1400 Vcharset_ordered_list. */
1401
1402struct charset *
1403char_charset (c, charset_list, code_return)
1404 int c;
1405 Lisp_Object charset_list;
1406 unsigned *code_return;
2e344af3 1407{
3263d5a2
KH
1408 if (NILP (charset_list))
1409 charset_list = Vcharset_ordered_list;
2e344af3 1410
3263d5a2 1411 while (CONSP (charset_list))
2e344af3 1412 {
3263d5a2
KH
1413 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1414 unsigned code = ENCODE_CHAR (charset, c);
1415
1416 if (code != CHARSET_INVALID_CODE (charset))
1417 {
1418 if (code_return)
1419 *code_return = code;
1420 return charset;
1421 }
1422 charset_list = XCDR (charset_list);
2e344af3 1423 }
3263d5a2 1424 return NULL;
2e344af3
KH
1425}
1426
2e344af3 1427
3263d5a2
KH
1428DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1429 doc: /*Return list of charset and one or two position-codes of CHAR.
1430If CHAR is invalid as a character code,
1431return a list of symbol `unknown' and CHAR. */)
1432 (ch)
1433 Lisp_Object ch;
2e344af3 1434{
3263d5a2
KH
1435 struct charset *charset;
1436 int c, dimension;
1437 unsigned code;
1438 Lisp_Object val;
1439
1440 CHECK_CHARACTER (ch);
1441 c = XFASTINT (ch);
1442 charset = CHAR_CHARSET (c);
1443 if (! charset)
1444 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1445
1446 code = ENCODE_CHAR (charset, c);
1447 if (code == CHARSET_INVALID_CODE (charset))
1448 abort ();
1449 dimension = CHARSET_DIMENSION (charset);
1450 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1451 : dimension == 2 ? Fcons (make_number (code >> 8),
1452 Fcons (make_number (code & 0xFF), Qnil))
1453 : Fcons (make_number (code >> 16),
1454 Fcons (make_number ((code >> 8) & 0xFF),
1455 Fcons (make_number (code & 0xFF), Qnil))));
1456 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1457}
87b089ad 1458
740f080d 1459
3263d5a2
KH
1460DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1461 doc: /* Return the charset of highest priority that contains CHAR. */)
1462 (ch)
1463 Lisp_Object ch;
740f080d 1464{
3263d5a2 1465 struct charset *charset;
740f080d 1466
3263d5a2
KH
1467 CHECK_CHARACTER (ch);
1468 charset = CHAR_CHARSET (XINT (ch));
1469 return (CHARSET_NAME (charset));
740f080d
KH
1470}
1471
2e344af3 1472
3263d5a2
KH
1473DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1474 doc: /*
1475Return charset of a character in the current buffer at position POS.
1476If POS is nil, it defauls to the current point.
1477If POS is out of range, the value is nil. */)
1478 (pos)
1479 Lisp_Object pos;
2e344af3 1480{
3263d5a2
KH
1481 Lisp_Object ch;
1482 struct charset *charset;
1483
1484 ch = Fchar_after (pos);
1485 if (! INTEGERP (ch))
1486 return ch;
1487 charset = CHAR_CHARSET (XINT (ch));
1488 return (CHARSET_NAME (charset));
87b089ad
RS
1489}
1490
2e344af3 1491
3263d5a2
KH
1492DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1493 doc: /*
1494Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1495
1496ISO 2022's designation sequence (escape sequence) distinguishes charsets
1497by their DIMENSION, CHARS, and FINAL-CHAR,
1498where as Emacs distinguishes them by charset symbol.
1499See the documentation of the function `charset-info' for the meanings of
1500DIMENSION, CHARS, and FINAL-CHAR. */)
1501 (dimension, chars, final_char)
1502 Lisp_Object dimension, chars, final_char;
2e344af3 1503{
3263d5a2 1504 int id;
2e344af3 1505
3263d5a2
KH
1506 check_iso_charset_parameter (dimension, chars, final_char);
1507 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1508 XFASTINT (final_char));
1509 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1510}
1511
3263d5a2
KH
1512
1513DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1514 0, 0, 0,
1515 doc: /*
1516Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1517 ()
4ed46869 1518{
53316e55 1519 int i;
3263d5a2
KH
1520 struct charset *charset;
1521 Lisp_Object attrs;
4ed46869 1522
3263d5a2 1523 for (i = 0; i < charset_table_used; i++)
4ed46869 1524 {
3263d5a2
KH
1525 charset = CHARSET_FROM_ID (i);
1526 attrs = CHARSET_ATTRIBUTES (charset);
1527
1528 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1529 {
1530 CHARSET_ATTR_DECODER (attrs) = Qnil;
1531 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1532 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1533 }
1534
1535 if (CHARSET_UNIFIED_P (charset))
1536 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1537 }
1538
3263d5a2 1539 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1540 {
3263d5a2
KH
1541 Foptimize_char_table (Vchar_unified_charset_table);
1542 Vchar_unify_table = Vchar_unified_charset_table;
1543 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1544 }
1545
3263d5a2 1546 return Qnil;
4ed46869
KH
1547}
1548
4ed46869 1549\f
3263d5a2
KH
1550void
1551init_charset ()
4ed46869 1552{
4ed46869 1553
4ed46869
KH
1554}
1555
4ed46869 1556
dfcf069d 1557void
4ed46869
KH
1558init_charset_once ()
1559{
1560 int i, j, k;
1561
3263d5a2
KH
1562 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1563 for (j = 0; j < ISO_MAX_CHARS; j++)
1564 for (k = 0; k < ISO_MAX_FINAL; k++)
1565 iso_charset_table[i][j][k] = -1;
1566
1567 for (i = 0; i < 255; i++)
1568 emacs_mule_charset[i] = NULL;
4ed46869 1569
3263d5a2
KH
1570#if 0
1571 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1572 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1573
1574 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1575
1576 /* Intern this now in case it isn't already done.
1577 Setting this variable twice is harmless.
1578 But don't staticpro it here--that is done in alloc.c. */
1579 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1580
3263d5a2
KH
1581 /* Now we are ready to set up this property, so we can create syntax
1582 tables. */
1583 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1584#endif
4ed46869
KH
1585}
1586
1587#ifdef emacs
1588
dfcf069d 1589void
4ed46869
KH
1590syms_of_charset ()
1591{
3263d5a2
KH
1592 char *p;
1593
1594 DEFSYM (Qcharsetp, "charsetp");
1595
1596 DEFSYM (Qascii, "ascii");
1597 DEFSYM (Qunicode, "unicode");
1598 DEFSYM (Qeight_bit_control, "eight-bit-control");
1599 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1600 DEFSYM (Qiso_8859_1, "iso-8859-1");
1601
1602 DEFSYM (Qgl, "gl");
1603 DEFSYM (Qgr, "gr");
1604
1605 p = (char *) xmalloc (30000);
1606
1607 staticpro (&Vcharset_ordered_list);
1608 Vcharset_ordered_list = Qnil;
1609
1610 staticpro (&Viso_2022_charset_list);
1611 Viso_2022_charset_list = Qnil;
1612
1613 staticpro (&Vemacs_mule_charset_list);
1614 Vemacs_mule_charset_list = Qnil;
1615
1616 staticpro (&Vcharset_hash_table);
1617 Vcharset_hash_table = Fmakehash (Qeq);
1618
1619 charset_table_size = 128;
1620 charset_table = ((struct charset *)
1621 xmalloc (sizeof (struct charset) * charset_table_size));
1622 charset_table_used = 0;
1623
1624 staticpro (&Vchar_unified_charset_table);
1625 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
1626
1627 defsubr (&Scharsetp);
1628 defsubr (&Smap_charset_chars);
1629 defsubr (&Sdefine_charset_internal);
1630 defsubr (&Sdefine_charset_alias);
1631 defsubr (&Sprimary_charset);
1632 defsubr (&Sset_primary_charset);
1633 defsubr (&Scharset_plist);
1634 defsubr (&Sset_charset_plist);
1635 defsubr (&Sunify_charset);
3fac5a51 1636 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1637 defsubr (&Sdeclare_equiv_charset);
1638 defsubr (&Sfind_charset_region);
1639 defsubr (&Sfind_charset_string);
3263d5a2
KH
1640 defsubr (&Sdecode_char);
1641 defsubr (&Sencode_char);
4ed46869 1642 defsubr (&Ssplit_char);
3263d5a2 1643 defsubr (&Smake_char);
4ed46869 1644 defsubr (&Schar_charset);
90d7b74e 1645 defsubr (&Scharset_after);
4ed46869 1646 defsubr (&Siso_charset);
3263d5a2
KH
1647 defsubr (&Sclear_charset_maps);
1648
1649 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
1650 doc: /* Directory of charset map files that come with GNU Emacs.
1651The default value is \"\\[data-directory]/charsets\". */);
1652 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
1653 Vdata_directory);
4ed46869
KH
1654
1655 DEFVAR_LISP ("charset-list", &Vcharset_list,
fdb82f93 1656 doc: /* List of charsets ever defined. */);
3263d5a2
KH
1657 Vcharset_list = Qnil;
1658
1659 /* Make the prerequisite charset `ascii' and `unicode'. */
1660 {
1661 Lisp_Object args[charset_arg_max];
1662 Lisp_Object plist[14];
1663 Lisp_Object val;
1664
1665 plist[0] = intern (":name");
1666 plist[1] = args[charset_arg_name] = Qascii;
1667 plist[2] = intern (":dimension");
1668 plist[3] = args[charset_arg_dimension] = make_number (1);
1669 val = Fmake_vector (make_number (8), make_number (0));
1670 ASET (val, 1, make_number (127));
1671 plist[4] = intern (":code-space");
1672 plist[5] = args[charset_arg_code_space] = val;
1673 plist[6] = intern (":iso-final-char");
1674 plist[7] = args[charset_arg_iso_final] = make_number ('B');
1675 args[charset_arg_iso_revision] = Qnil;
1676 plist[8] = intern (":emacs-mule-id");
1677 plist[9] = args[charset_arg_emacs_mule_id] = make_number (0);
1678 plist[10] = intern (":ascii-compatible-p");
1679 plist[11] = args[charset_arg_ascii_compatible_p] = Qt;
1680 args[charset_arg_supplementary_p] = Qnil;
1681 args[charset_arg_invalid_code] = Qnil;
1682 plist[12] = intern (":code-offset");
1683 plist[13] = args[charset_arg_code_offset] = make_number (0);
1684 args[charset_arg_map] = Qnil;
1685 args[charset_arg_parents] = Qnil;
1686 args[charset_arg_unify_map] = Qnil;
1687 /* The actual plist is set by mule-conf.el. */
1688 args[charset_arg_plist] = Flist (14, plist);
1689 Fdefine_charset_internal (charset_arg_max, args);
1690 charset_ascii = CHARSET_SYMBOL_ID (Qascii);
1691
1692 plist[1] = args[charset_arg_name] = Qunicode;
1693 plist[3] = args[charset_arg_dimension] = make_number (3);
1694 val = Fmake_vector (make_number (8), make_number (0));
1695 ASET (val, 1, make_number (255));
1696 ASET (val, 3, make_number (255));
1697 ASET (val, 5, make_number (16));
1698 plist[5] = args[charset_arg_code_space] = val;
1699 plist[7] = args[charset_arg_iso_final] = Qnil;
1700 args[charset_arg_iso_revision] = Qnil;
1701 plist[9] = args[charset_arg_emacs_mule_id] = Qnil;
1702 plist[11] = args[charset_arg_ascii_compatible_p] = Qt;
1703 args[charset_arg_supplementary_p] = Qnil;
1704 args[charset_arg_invalid_code] = Qnil;
1705 plist[13] = args[charset_arg_code_offset] = make_number (0);
1706 args[charset_arg_map] = Qnil;
1707 args[charset_arg_parents] = Qnil;
1708 args[charset_arg_unify_map] = Qnil;
1709 /* The actual plist is set by mule-conf.el. */
1710 args[charset_arg_plist] = Flist (14, plist);
1711 Fdefine_charset_internal (charset_arg_max, args);
1712 charset_unicode = CHARSET_SYMBOL_ID (Qunicode);
1713 }
4ed46869
KH
1714}
1715
1716#endif /* emacs */