(Fdefine_coding_system_internal): Fix previous change.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0
PE
26#ifdef emacs
27#include <config.h>
28#endif
29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869
KH
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
4ed46869 50
04c2f2c5 51/*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
4ed46869 52
3263d5a2 53 A coded character set ("charset" hereafter) is a meaningful
04c2f2c5 54 collection (i.e. language, culture, functionality, etc.) of
3263d5a2 55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
04c2f2c5
DL
56 code, a charset is represented by a symbol. In C code, a charset is
57 represented by its ID number or by a pointer to a struct charset.
4ed46869 58
3263d5a2
KH
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
04c2f2c5 62 charset_table as a struct charset.
4ed46869 63
3263d5a2 64*/
4ed46869 65
3263d5a2
KH
66/* List of all charsets. This variable is used only from Emacs
67 Lisp. */
4ed46869
KH
68Lisp_Object Vcharset_list;
69
3263d5a2
KH
70/* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72Lisp_Object Vcharset_hash_table;
73
74/* Table of struct charset. */
75struct charset *charset_table;
76
77static int charset_table_size;
78int charset_table_used;
79
80Lisp_Object Qcharsetp;
81
82/* Special charset symbols. */
83Lisp_Object Qascii;
84Lisp_Object Qeight_bit_control;
85Lisp_Object Qeight_bit_graphic;
86Lisp_Object Qiso_8859_1;
87Lisp_Object Qunicode;
b0e3cf2b 88
3263d5a2
KH
89/* The corresponding charsets. */
90int charset_ascii;
91int charset_8_bit_control;
92int charset_8_bit_graphic;
93int charset_iso_8859_1;
94int charset_unicode;
c1a08b4c 95
3263d5a2
KH
96/* Value of charset attribute `charset-iso-plane'. */
97Lisp_Object Qgl, Qgr;
c1a08b4c 98
3263d5a2
KH
99/* The primary charset. It is a charset of unibyte characters. */
100int charset_primary;
101
102/* List of charsets ordered by the priority. */
103Lisp_Object Vcharset_ordered_list;
104
105/* List of iso-2022 charsets. */
106Lisp_Object Viso_2022_charset_list;
107
108/* List of emacs-mule charsets. */
109Lisp_Object Vemacs_mule_charset_list;
110
111struct charset *emacs_mule_charset[256];
4ed46869
KH
112
113/* Mapping table from ISO2022's charset (specified by DIMENSION,
114 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
115int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
116
117Lisp_Object Vcharset_map_directory;
118
119Lisp_Object Vchar_unified_charset_table;
120
69f8de5b
KH
121#define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (code) - (charset)->min_code \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0])) \
3263d5a2
KH
135 : -1)
136
137
138/* Convert the character index IDX to code-point CODE for CHARSET.
139 It is assumed that IDX is in a valid range. */
140
141#define INDEX_TO_CODE_POINT(charset, idx) \
142 ((charset)->code_linear_p \
143 ? (idx) + (charset)->min_code \
144 : (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
147 << 8) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
150 << 16) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
152 << 24)))
4ed46869 153
3263d5a2 154\f
4ed46869 155
e9ce014c
KH
156/* Set to 1 to warn that a charset map is loaded and thus a buffer
157 text and a string data may be relocated. */
3263d5a2 158int charset_map_loaded;
35e623fb 159
e9ce014c
KH
160struct charset_map_entries
161{
162 struct {
163 unsigned from, to;
164 int c;
165 } entry[0x10000];
166 struct charset_map_entries *next;
167};
168
169/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 170
3263d5a2 171 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 172
3263d5a2
KH
173 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
174 CHARSET->decoder, and CHARSET->encoder.
93bcb785 175
3263d5a2
KH
176 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
177 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
178 setup it too. */
4ed46869 179
3263d5a2 180static void
e9ce014c 181load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 182 struct charset *charset;
e9ce014c
KH
183 struct charset_map_entries *entries;
184 int n_entries;
3263d5a2 185 int control_flag;
4ed46869 186{
3263d5a2 187 Lisp_Object vec, table;
3263d5a2
KH
188 unsigned max_code = CHARSET_MAX_CODE (charset);
189 int ascii_compatible_p = charset->ascii_compatible_p;
190 int min_char, max_char, nonascii_min_char;
3263d5a2 191 int i;
3263d5a2 192 unsigned char *fast_map = charset->fast_map;
99529c2c 193
e9ce014c
KH
194 if (n_entries <= 0)
195 return;
196
197 if (control_flag > 0)
8ac5a9cc 198 {
3263d5a2
KH
199 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
200 unsigned invalid_code = CHARSET_INVALID_CODE (charset);
6662e69b 201
3263d5a2
KH
202 table = Fmake_char_table (Qnil, make_number (invalid_code));
203 if (control_flag == 1)
204 vec = Fmake_vector (make_number (n), make_number (-1));
205 else if (! CHAR_TABLE_P (Vchar_unify_table))
206 Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1));
6662e69b 207
3263d5a2 208 charset_map_loaded = 1;
2e344af3 209 }
3263d5a2 210
e9ce014c 211 min_char = max_char = entries->entry[0].c;
3263d5a2 212 nonascii_min_char = MAX_CHAR;
e9ce014c 213 for (i = 0; i < n_entries; i++)
2e344af3 214 {
e9ce014c 215 unsigned from, to;
04c2f2c5 216 int c;
e9ce014c 217 int idx = i % 0x10000;
3263d5a2 218
e9ce014c
KH
219 if (i > 0 && idx == 0)
220 entries = entries->next;
221 from = entries->entry[idx].from;
222 to = entries->entry[idx].to;
223 c = entries->entry[idx].c;
3263d5a2 224
3263d5a2
KH
225 if (control_flag < 2)
226 {
e9ce014c 227 if (control_flag == 1)
3263d5a2 228 {
e9ce014c
KH
229 unsigned code = from;
230 int from_index, to_index;
231
232 from_index = CODE_POINT_TO_INDEX (charset, from);
233 if (from == to)
234 to_index = from_index;
235 else
236 to_index = CODE_POINT_TO_INDEX (charset, to);
237 if (from_index < 0 || to_index < 0)
238 continue;
239 if (CHARSET_COMPACT_CODES_P (charset))
240 while (1)
241 {
242 ASET (vec, from_index, make_number (c));
243 CHAR_TABLE_SET (table, c, make_number (code));
244 if (from_index == to_index)
245 break;
246 from_index++, c++;
247 code = INDEX_TO_CODE_POINT (charset, from_index);
248 }
249 else
250 for (; from_index <= to_index; from_index++, c++)
251 {
252 ASET (vec, from_index, make_number (c));
253 CHAR_TABLE_SET (table, c, make_number (from_index));
254 }
3263d5a2 255 }
e9ce014c
KH
256
257 if (c > max_char)
3263d5a2
KH
258 max_char = c;
259 else if (c < min_char)
260 min_char = c;
261 if (ascii_compatible_p && ! ASCII_BYTE_P (c)
262 && c < nonascii_min_char)
263 nonascii_min_char = c;
264
265 CHARSET_FAST_MAP_SET (c, fast_map);
266 }
e9ce014c 267 else
2e344af3 268 {
69f8de5b
KH
269 unsigned code = from;
270 int from_index, to_index;
e9ce014c 271
69f8de5b
KH
272 from_index = CODE_POINT_TO_INDEX (charset, from);
273 if (from == to)
274 to_index = from_index;
275 else
276 to_index = CODE_POINT_TO_INDEX (charset, to);
277 if (from_index < 0 || to_index < 0)
278 continue;
279 while (1)
280 {
281 int c1 = DECODE_CHAR (charset, code);
282
3263d5a2
KH
283 if (c1 >= 0)
284 {
285 CHAR_TABLE_SET (table, c, make_number (c1));
286 CHAR_TABLE_SET (Vchar_unify_table, c1, c);
287 if (CHAR_TABLE_P (Vchar_unified_charset_table))
288 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
289 CHARSET_NAME (charset));
290 }
69f8de5b
KH
291 if (from_index == to_index)
292 break;
293 from_index++, c++;
294 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 295 }
2e344af3 296 }
8ac5a9cc 297 }
3263d5a2
KH
298
299 if (control_flag < 2)
4ed46869 300 {
3263d5a2
KH
301 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
302 ? nonascii_min_char : min_char);
303 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 304 if (control_flag == 1)
4ed46869 305 {
3263d5a2
KH
306 CHARSET_DECODER (charset) = vec;
307 CHARSET_ENCODER (charset) = table;
4ed46869
KH
308 }
309 }
2e344af3 310 else
3263d5a2 311 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
312}
313
12bcae05 314
3263d5a2
KH
315/* Read a hexadecimal number (preceded by "0x") from the file FP while
316 paying attention to comment charcter '#'. */
12bcae05 317
3263d5a2
KH
318static INLINE unsigned
319read_hex (fp, eof)
320 FILE *fp;
321 int *eof;
12bcae05 322{
3263d5a2
KH
323 int c;
324 unsigned n;
12bcae05 325
3263d5a2
KH
326 while ((c = getc (fp)) != EOF)
327 {
69f8de5b 328 if (c == '#')
3263d5a2
KH
329 {
330 while ((c = getc (fp)) != EOF && c != '\n');
331 }
332 else if (c == '0')
333 {
334 if ((c = getc (fp)) == EOF || c == 'x')
335 break;
336 }
337 }
338 if (c == EOF)
339 {
340 *eof = 1;
341 return 0;
342 }
343 *eof = 0;
344 n = 0;
345 if (c == 'x')
346 while ((c = getc (fp)) != EOF && isxdigit (c))
347 n = ((n << 4)
348 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
349 else
350 while ((c = getc (fp)) != EOF && isdigit (c))
351 n = (n * 10) + c - '0';
e9ce014c
KH
352 if (c != EOF)
353 ungetc (c, fp);
3263d5a2
KH
354 return n;
355}
12bcae05 356
537efd8d 357
3263d5a2 358/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
359 Each line of MAPFILE has this form
360 0xAAAA 0xCCCC
361 where 0xAAAA is a code-point and 0xCCCC is the corresponding
362 character code, or this form
363 0xAAAA-0xBBBB 0xCCCC
364 where 0xAAAA and 0xBBBB are code-points specifying a range, and
365 0xCCCC is the first character code of the range.
366
3263d5a2
KH
367 The returned vector has this form:
368 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
369 where CODE1 is a code-point or a cons of code-points specifying a
370 range. */
4ed46869 371
c449997d
KH
372extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
373
e9ce014c
KH
374static void
375load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
376 struct charset *charset;
377 Lisp_Object mapfile;
e9ce014c 378 int control_flag;
4ed46869 379{
e9ce014c
KH
380 unsigned min_code = CHARSET_MIN_CODE (charset);
381 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
382 int fd;
383 FILE *fp;
3263d5a2
KH
384 int eof;
385 Lisp_Object suffixes;
e9ce014c
KH
386 struct charset_map_entries *head, *entries;
387 int n_entries;
4ed46869 388
3263d5a2
KH
389 suffixes = Fcons (build_string (".map"),
390 Fcons (build_string (".TXT"), Qnil));
4ed46869 391
3263d5a2
KH
392 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
393 NULL, 0);
394 if (fd < 0
395 || ! (fp = fdopen (fd, "r")))
396 {
397 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 398 return;
3263d5a2 399 }
4ed46869 400
e9ce014c
KH
401 head = entries = ((struct charset_map_entries *)
402 alloca (sizeof (struct charset_map_entries)));
403 n_entries = 0;
3263d5a2
KH
404 eof = 0;
405 while (1)
406 {
e9ce014c
KH
407 unsigned from, to;
408 int c;
409 int idx;
4ed46869 410
e9ce014c 411 from = read_hex (fp, &eof);
3263d5a2
KH
412 if (eof)
413 break;
e9ce014c
KH
414 if (getc (fp) == '-')
415 to = read_hex (fp, &eof);
416 else
417 to = from;
418 c = (int) read_hex (fp, &eof);
419
420 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
421 continue;
422
423 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 424 {
e9ce014c
KH
425 entries->next = ((struct charset_map_entries *)
426 alloca (sizeof (struct charset_map_entries)));
427 entries = entries->next;
3263d5a2 428 }
e9ce014c
KH
429 idx = n_entries % 0x10000;
430 entries->entry[idx].from = from;
431 entries->entry[idx].to = to;
432 entries->entry[idx].c = c;
433 n_entries++;
3263d5a2
KH
434 }
435 fclose (fp);
436 close (fd);
4ed46869 437
e9ce014c
KH
438 load_charset_map (charset, head, n_entries, control_flag);
439}
440
441static void
442load_charset_map_from_vector (charset, vec, control_flag)
443 struct charset *charset;
444 Lisp_Object vec;
445 int control_flag;
446{
447 unsigned min_code = CHARSET_MIN_CODE (charset);
448 unsigned max_code = CHARSET_MAX_CODE (charset);
449 struct charset_map_entries *head, *entries;
450 int n_entries;
451 int len = ASIZE (vec);
452 int i;
453
454 if (len % 2 == 1)
3263d5a2 455 {
e9ce014c
KH
456 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
457 return;
3263d5a2 458 }
4ed46869 459
e9ce014c
KH
460 head = entries = ((struct charset_map_entries *)
461 alloca (sizeof (struct charset_map_entries)));
462 n_entries = 0;
463 for (i = 0; i < len; i += 2)
464 {
465 Lisp_Object val, val2;
466 unsigned from, to;
467 int c;
468 int idx;
3263d5a2 469
e9ce014c
KH
470 val = AREF (vec, i);
471 if (CONSP (val))
472 {
473 val2 = XCDR (val);
474 val = XCAR (val);
475 CHECK_NATNUM (val);
476 CHECK_NATNUM (val2);
477 from = XFASTINT (val);
478 to = XFASTINT (val2);
479 }
480 else
481 {
482 CHECK_NATNUM (val);
483 from = to = XFASTINT (val);
484 }
485 val = AREF (vec, i + 1);
486 CHECK_NATNUM (val);
487 c = XFASTINT (val);
488
489 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
490 continue;
491
492 if ((n_entries % 0x10000) == 0)
493 {
494 entries->next = ((struct charset_map_entries *)
495 alloca (sizeof (struct charset_map_entries)));
496 entries = entries->next;
497 }
498 idx = n_entries % 0x10000;
499 entries->entry[idx].from = from;
500 entries->entry[idx].to = to;
501 entries->entry[idx].c = c;
502 n_entries++;
503 }
504
505 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
506}
507
3263d5a2
KH
508static void
509load_charset (charset)
510 struct charset *charset;
ac4137cc 511{
3263d5a2
KH
512 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
513 {
514 Lisp_Object map;
ac4137cc 515
3263d5a2
KH
516 map = CHARSET_MAP (charset);
517 if (STRINGP (map))
e9ce014c
KH
518 load_charset_map_from_file (charset, map, 1);
519 else
520 load_charset_map_from_vector (charset, map, 1);
3263d5a2
KH
521 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
522 }
4ed46869
KH
523}
524
3263d5a2
KH
525
526DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
527 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
528 (object)
529 Lisp_Object object;
23d2a7f1 530{
3263d5a2 531 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
532}
533
35e623fb 534
3263d5a2
KH
535void
536map_charset_chars (c_function, function, charset_symbol, arg)
537 void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object);
538 Lisp_Object function, charset_symbol, arg;
35e623fb 539{
3263d5a2
KH
540 int id;
541 struct charset *charset;
542 Lisp_Object range;
d2665018 543
3263d5a2
KH
544 CHECK_CHARSET_GET_ID (charset_symbol, id);
545 charset = CHARSET_FROM_ID (id);
546
547 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
548 load_charset (charset);
549
550 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
551 {
552 range = Fcons (make_number (CHARSET_MIN_CHAR (charset)),
553 make_number (CHARSET_MAX_CHAR (charset)));
554 if (NILP (function))
555 (*c_function) (arg, range, Qnil);
556 else
557 call2 (function, range, arg);
558 }
559 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
560 {
561 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
562 return;
563 if (CHARSET_ASCII_COMPATIBLE_P (charset))
bbf12bb3 564 {
3263d5a2
KH
565 range = Fcons (make_number (0), make_number (127));
566 if (NILP (function))
567 (*c_function) (arg, range, Qnil);
568 else
569 call2 (function, range, arg);
bbf12bb3 570 }
3263d5a2
KH
571 map_char_table (c_function, function, CHARSET_ENCODER (charset), arg,
572 0, NULL);
573 }
574 else /* i.e. CHARSET_METHOD_PARENT */
575 {
576 int from, to, c;
577 unsigned code;
578 int i, j, k, l;
579 int *code_space = CHARSET_CODE_SPACE (charset);
580 Lisp_Object val;
581
582 range = Fcons (Qnil, Qnil);
583 from = to = -2;
584 for (i = code_space[12]; i <= code_space[13]; i++)
585 for (j = code_space[8]; j <= code_space[9]; j++)
586 for (k = code_space[4]; k <= code_space[5]; k++)
587 for (l = code_space[0]; l <= code_space[1]; l++)
588 {
589 code = (i << 24) | (j << 16) | (k << 8) | l;
590 c = DECODE_CHAR (charset, code);
591 if (c == to + 1)
592 {
593 to++;
594 continue;
595 }
596 if (from >= 0)
597 {
598 if (from < to)
599 {
600 XSETCAR (range, make_number (from));
601 XSETCDR (range, make_number (to));
602 val = range;
603 }
604 else
605 val = make_number (from);
606 if (NILP (function))
607 (*c_function) (arg, val, Qnil);
608 else
609 call2 (function, val, arg);
610 }
611 from = to = (c < 0 ? -2 : c);
612 }
613 if (from >= 0)
bbf12bb3 614 {
3263d5a2
KH
615 if (from < to)
616 {
617 XSETCAR (range, make_number (from));
618 XSETCDR (range, make_number (to));
619 val = range;
620 }
621 else
622 val = make_number (from);
623 if (NILP (function))
624 (*c_function) (arg, val, Qnil);
625 else
626 call2 (function, val, arg);
bbf12bb3 627 }
35e623fb 628 }
3263d5a2
KH
629}
630
631DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0,
04c2f2c5
DL
632 doc: /* Call FUNCTION for all characters in CHARSET.
633FUNCTION is called with an argument RANGE and optional 2nd
3263d5a2 634argument ARG.
6abd9323 635
04c2f2c5
DL
636RANGE is either a cons (FROM . TO), where FROM and TO indicate a range of
637characters contained in CHARSET or a single character in the case that
638FROM and TO would be equal. (The charset mapping may have gaps.)*/)
3263d5a2
KH
639 (function, charset, arg)
640 Lisp_Object function, charset, arg;
641{
642 map_charset_chars (NULL, function, charset, arg);
643 return Qnil;
35e623fb 644}
76d7b829
KH
645
646
3263d5a2
KH
647/* Define a charset according to the arguments. The Nth argument is
648 the Nth attribute of the charset (the last attribute `charset-id'
649 is not included). See the docstring of `define-charset' for the
650 detail. */
76d7b829 651
3263d5a2
KH
652DEFUN ("define-charset-internal", Fdefine_charset_internal,
653 Sdefine_charset_internal, charset_arg_max, MANY, 0,
04c2f2c5
DL
654 doc: /* For internal use only.
655usage: (define-charset-internal ...) */)
3263d5a2
KH
656 (nargs, args)
657 int nargs;
658 Lisp_Object *args;
76d7b829 659{
3263d5a2
KH
660 /* Charset attr vector. */
661 Lisp_Object attrs;
662 Lisp_Object val;
663 unsigned hash_code;
664 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 665 int i, j;
3263d5a2
KH
666 struct charset charset;
667 int id;
668 int dimension;
669 int new_definition_p;
670 int nchars;
671
672 if (nargs != charset_arg_max)
673 return Fsignal (Qwrong_number_of_arguments,
674 Fcons (intern ("define-charset-internal"),
675 make_number (nargs)));
676
677 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
678
679 CHECK_SYMBOL (args[charset_arg_name]);
680 ASET (attrs, charset_name, args[charset_arg_name]);
681
682 val = args[charset_arg_code_space];
683 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 684 {
3263d5a2
KH
685 int min_byte, max_byte;
686
687 min_byte = XINT (Faref (val, make_number (i * 2)));
688 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
689 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
690 error ("Invalid :code-space value");
691 charset.code_space[i * 4] = min_byte;
692 charset.code_space[i * 4 + 1] = max_byte;
693 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
694 nchars *= charset.code_space[i * 4 + 2];
695 charset.code_space[i * 4 + 3] = nchars;
696 if (max_byte > 0)
697 dimension = i + 1;
698 }
76d7b829 699
3263d5a2
KH
700 val = args[charset_arg_dimension];
701 if (NILP (val))
702 charset.dimension = dimension;
703 else
704 {
705 CHECK_NATNUM (val);
706 charset.dimension = XINT (val);
707 if (charset.dimension < 1 || charset.dimension > 4)
708 args_out_of_range_3 (val, make_number (1), make_number (4));
709 }
710
711 charset.code_linear_p
712 = (charset.dimension == 1
713 || (charset.code_space[2] == 256
714 && (charset.dimension == 2
715 || (charset.code_space[6] == 256
716 && (charset.dimension == 3
717 || charset.code_space[10] == 256)))));
718
69f8de5b
KH
719 if (! charset.code_linear_p)
720 {
721 charset.code_space_mask = (unsigned char *) xmalloc (256);
722 bzero (charset.code_space_mask, sizeof (charset.code_space_mask));
723 for (i = 0; i < 4; i++)
724 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
725 j++)
726 charset.code_space_mask[j] |= (1 << i);
727 }
728
3263d5a2
KH
729 charset.iso_chars_96 = charset.code_space[2] == 96;
730
731 charset.min_code = (charset.code_space[0]
732 | (charset.code_space[4] << 8)
733 | (charset.code_space[8] << 16)
734 | (charset.code_space[12] << 24));
735 charset.max_code = (charset.code_space[1]
736 | (charset.code_space[5] << 8)
737 | (charset.code_space[9] << 16)
738 | (charset.code_space[13] << 24));
739
e9ce014c
KH
740 charset.compact_codes_p = charset.max_code < 0x1000000;
741
3263d5a2
KH
742 val = args[charset_arg_invalid_code];
743 if (NILP (val))
744 {
745 if (charset.min_code > 0)
746 charset.invalid_code = 0;
bbf12bb3
KH
747 else
748 {
3263d5a2
KH
749 XSETINT (val, charset.max_code + 1);
750 if (XINT (val) == charset.max_code + 1)
751 charset.invalid_code = charset.max_code + 1;
752 else
753 error ("Attribute :invalid-code must be specified");
76d7b829 754 }
76d7b829 755 }
3263d5a2
KH
756 else
757 {
758 CHECK_NATNUM (val);
759 charset.invalid_code = XFASTINT (val);
760 }
76d7b829 761
3263d5a2
KH
762 val = args[charset_arg_iso_final];
763 if (NILP (val))
764 charset.iso_final = -1;
765 else
766 {
767 CHECK_NUMBER (val);
768 if (XINT (val) < '0' || XINT (val) > 127)
769 error ("Invalid iso-final-char: %d", XINT (val));
770 charset.iso_final = XINT (val);
771 }
772
773 val = args[charset_arg_iso_revision];
774 if (NILP (val))
775 charset.iso_revision = -1;
776 else
4ed46869 777 {
3263d5a2
KH
778 CHECK_NUMBER (val);
779 if (XINT (val) > 63)
780 args_out_of_range (make_number (63), val);
781 charset.iso_revision = XINT (val);
4ed46869 782 }
3263d5a2
KH
783
784 val = args[charset_arg_emacs_mule_id];
785 if (NILP (val))
786 charset.emacs_mule_id = -1;
4ed46869
KH
787 else
788 {
3263d5a2
KH
789 CHECK_NATNUM (val);
790 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
791 error ("Invalid emacs-mule-id: %d", XINT (val));
792 charset.emacs_mule_id = XINT (val);
c83ef371 793 }
6ef23ebb 794
3263d5a2 795 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 796
3263d5a2
KH
797 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
798
799 charset.unified_p = 0;
800
801 bzero (charset.fast_map, sizeof (charset.fast_map));
802
803 if (! NILP (args[charset_arg_code_offset]))
804 {
805 val = args[charset_arg_code_offset];
806 CHECK_NUMBER (val);
807
808 charset.method = CHARSET_METHOD_OFFSET;
809 charset.code_offset = XINT (val);
810
811 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
812 charset.min_char = i + charset.code_offset;
813 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
814 charset.max_char = i + charset.code_offset;
815 if (charset.max_char > MAX_CHAR)
816 error ("Unsupported max char: %d", charset.max_char);
817
818 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
819 i += 128)
820 CHARSET_FAST_MAP_SET (i, charset.fast_map);
821 for (; i <= charset.max_char; i += 0x1000)
822 CHARSET_FAST_MAP_SET (i, charset.fast_map);
823 }
824 else if (! NILP (args[charset_arg_map]))
825 {
826 val = args[charset_arg_map];
827 ASET (attrs, charset_map, val);
828 if (STRINGP (val))
e9ce014c
KH
829 load_charset_map_from_file (&charset, val, 0);
830 else
831 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
832 charset.method = CHARSET_METHOD_MAP_DEFERRED;
833 }
834 else if (! NILP (args[charset_arg_parents]))
835 {
836 val = args[charset_arg_parents];
837 CHECK_LIST (val);
838 charset.method = CHARSET_METHOD_INHERIT;
839 val = Fcopy_sequence (val);
840 ASET (attrs, charset_parents, val);
841
842 charset.min_char = MAX_CHAR;
843 charset.max_char = 0;
844 for (; ! NILP (val); val = Fcdr (val))
4ed46869 845 {
3263d5a2
KH
846 Lisp_Object elt, car_part, cdr_part;
847 int this_id, offset;
848 struct charset *this_charset;
849
850 elt = Fcar (val);
851 if (CONSP (elt))
852 {
853 car_part = XCAR (elt);
854 cdr_part = XCDR (elt);
855 CHECK_CHARSET_GET_ID (car_part, this_id);
856 CHECK_NUMBER (cdr_part);
857 offset = XINT (cdr_part);
858 }
859 else
4ed46869 860 {
3263d5a2
KH
861 CHECK_CHARSET_GET_ID (elt, this_id);
862 offset = 0;
4ed46869 863 }
3263d5a2
KH
864 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
865
866 this_charset = CHARSET_FROM_ID (this_id);
867 if (charset.min_char > this_charset->min_char)
868 charset.min_char = this_charset->min_char;
869 if (charset.max_char < this_charset->max_char)
870 charset.max_char = this_charset->max_char;
871 for (i = 0; i < 190; i++)
872 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 873 }
4ed46869 874 }
3263d5a2
KH
875 else
876 error ("None of :code-offset, :map, :parents are specified");
4ed46869 877
3263d5a2
KH
878 val = args[charset_arg_unify_map];
879 if (! NILP (val) && !STRINGP (val))
880 CHECK_VECTOR (val);
881 ASET (attrs, charset_unify_map, val);
4ed46869 882
3263d5a2
KH
883 CHECK_LIST (args[charset_arg_plist]);
884 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 885
3263d5a2
KH
886 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
887 &hash_code);
888 if (charset.hash_index >= 0)
889 {
890 new_definition_p = 0;
4f65af01 891 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
3263d5a2
KH
892 HASH_VALUE (hash_table, charset.hash_index) = attrs;
893 }
1a45ff10 894 else
3263d5a2
KH
895 {
896 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
897 hash_code);
898 if (charset_table_used == charset_table_size)
899 {
900 charset_table_size += 256;
901 charset_table
902 = ((struct charset *)
903 xrealloc (charset_table,
904 sizeof (struct charset) * charset_table_size));
905 }
906 id = charset_table_used++;
3263d5a2
KH
907 new_definition_p = 1;
908 }
4ed46869 909
4f65af01 910 ASET (attrs, charset_id, make_number (id));
3263d5a2
KH
911 charset.id = id;
912 charset_table[id] = charset;
913
914 if (charset.iso_final >= 0)
4ed46869 915 {
3263d5a2
KH
916 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
917 charset.iso_final) = id;
918 if (new_definition_p)
919 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
920 Fcons (make_number (id), Qnil));
4ed46869 921 }
3263d5a2
KH
922
923 if (charset.emacs_mule_id >= 0)
4ed46869 924 {
3263d5a2 925 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
4f65af01
KH
926 if (charset.emacs_mule_id < 0xA0)
927 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
3263d5a2
KH
928 if (new_definition_p)
929 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
930 Fcons (make_number (id), Qnil));
4ed46869
KH
931 }
932
3263d5a2
KH
933 if (new_definition_p)
934 {
935 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
936 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
937 Fcons (make_number (id), Qnil));
938 }
4ed46869 939
3263d5a2 940 return Qnil;
4ed46869
KH
941}
942
3263d5a2
KH
943
944DEFUN ("define-charset-alias", Fdefine_charset_alias,
945 Sdefine_charset_alias, 2, 2, 0,
946 doc: /* Define ALIAS as an alias for charset CHARSET. */)
947 (alias, charset)
948 Lisp_Object alias, charset;
4ed46869 949{
3263d5a2
KH
950 Lisp_Object attr;
951
952 CHECK_CHARSET_GET_ATTR (charset, attr);
953 Fputhash (alias, attr, Vcharset_hash_table);
954 return Qnil;
955}
4ed46869 956
4ed46869 957
3263d5a2
KH
958DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
959 doc: /* Return the primary charset. */)
960 ()
961{
962 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
963}
4ed46869 964
4ed46869 965
3263d5a2
KH
966DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
967 1, 1, 0,
968 doc: /* Set the primary charset to CHARSET. */)
969 (charset)
970 Lisp_Object charset;
971{
972 int id;
973
974 CHECK_CHARSET_GET_ID (charset, id);
975 charset_primary = id;
4ed46869
KH
976 return Qnil;
977}
978
3263d5a2
KH
979
980DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
981 doc: /* Return a property list of CHARSET. */)
982 (charset)
983 Lisp_Object charset;
984{
985 Lisp_Object attrs;
986
987 CHECK_CHARSET_GET_ATTR (charset, attrs);
988 return CHARSET_ATTR_PLIST (attrs);
989}
990
991
992DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
993 doc: /* Set CHARSET's property list to PLIST. */)
994 (charset, plist)
995 Lisp_Object charset, plist;
996{
997 Lisp_Object attrs;
998
999 CHECK_CHARSET_GET_ATTR (charset, attrs);
1000 CHARSET_ATTR_PLIST (attrs) = plist;
1001 return plist;
1002}
1003
1004
1005DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0,
1006 doc: /* Unify characters of CHARSET with Unicode. */)
1007 (charset, unify_map)
1008 Lisp_Object charset, unify_map;
8a73a704 1009{
3263d5a2
KH
1010 int id;
1011 struct charset *cs;
1012
1013 CHECK_CHARSET_GET_ID (charset, id);
1014 cs = CHARSET_FROM_ID (id);
1015 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1016 load_charset (cs);
1017 if (CHARSET_UNIFIED_P (cs)
1018 && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs)))
1019 return Qnil;
1020 CHARSET_UNIFIED_P (cs) = 0;
1021 if (NILP (unify_map))
1022 unify_map = CHARSET_UNIFY_MAP (cs);
1023 if (STRINGP (unify_map))
e9ce014c
KH
1024 load_charset_map_from_file (cs, unify_map, 2);
1025 else
1026 load_charset_map_from_vector (cs, unify_map, 2);
3263d5a2
KH
1027 CHARSET_UNIFIED_P (cs) = 1;
1028 return Qnil;
8a73a704
KH
1029}
1030
3fac5a51
KH
1031DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1032 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2
KH
1033 doc: /*
1034Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1035DIMENSION is the number of bytes to represent a character: 1 or 2.
1036CHARS is the number of characters in a dimension: 94 or 96.
1037
1038This final char is for private use, thus the range is `0' (48) .. `?' (63).
3263d5a2 1039If there's no unused final char for the attrified kind of charset,
fdb82f93
PJ
1040return nil. */)
1041 (dimension, chars)
3fac5a51
KH
1042 Lisp_Object dimension, chars;
1043{
1044 int final_char;
1045
b7826503
PJ
1046 CHECK_NUMBER (dimension);
1047 CHECK_NUMBER (chars);
3263d5a2
KH
1048 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1049 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1050 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1051 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1052 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1053 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1054 break;
3fac5a51
KH
1055 return (final_char <= '?' ? make_number (final_char) : Qnil);
1056}
1057
3263d5a2
KH
1058static void
1059check_iso_charset_parameter (dimension, chars, final_char)
1060 Lisp_Object dimension, chars, final_char;
4ed46869 1061{
3263d5a2
KH
1062 CHECK_NATNUM (dimension);
1063 CHECK_NATNUM (chars);
1064 CHECK_NATNUM (final_char);
4ed46869 1065
3263d5a2
KH
1066 if (XINT (dimension) > 3)
1067 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1068 if (XINT (chars) != 94 && XINT (chars) != 96)
1069 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1070 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1071 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1072}
1073
1074
1075DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1076 4, 4, 0,
1077 doc: /*
1078Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
1079CHARSET should be defined by `defined-charset' in advance. */)
1080 (dimension, chars, final_char, charset)
1081 Lisp_Object dimension, chars, final_char, charset;
1082{
1083 int id;
4ed46869 1084
3263d5a2
KH
1085 CHECK_CHARSET_GET_ID (charset, id);
1086 check_iso_charset_parameter (dimension, chars, final_char);
1087
1088 ISO_CHARSET_TABLE (dimension, chars, final_char) = id;
4ed46869
KH
1089 return Qnil;
1090}
1091
3263d5a2 1092
2e344af3
KH
1093/* Return information about charsets in the text at PTR of NBYTES
1094 bytes, which are NCHARS characters. The value is:
f6302ac9 1095
cfe34140 1096 0: Each character is represented by one byte. This is always
3263d5a2
KH
1097 true for a unibyte string. For a multibyte string, true if
1098 it contains only ASCII characters.
1099
1100 1: No charsets other than ascii, eight-bit-control, and
1101 latin-1 are found.
1d67c29b 1102
3263d5a2
KH
1103 2: Otherwise.
1104*/
4ed46869
KH
1105
1106int
3263d5a2
KH
1107string_xstring_p (string)
1108 Lisp_Object string;
4ed46869 1109{
3263d5a2
KH
1110 unsigned char *p = XSTRING (string)->data;
1111 unsigned char *endp = p + STRING_BYTES (XSTRING (string));
1112 struct charset *charset;
1113
1114 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1115 return 0;
1116
1117 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1118 while (p < endp)
0282eb69 1119 {
3263d5a2 1120 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1121
3263d5a2
KH
1122 if (ENCODE_CHAR (charset, c) < 0)
1123 return 2;
0282eb69 1124 }
3263d5a2
KH
1125 return 1;
1126}
05505664 1127
05505664 1128
3263d5a2 1129/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1130
3263d5a2
KH
1131 CHARSETS is a vector. Each element is a cons of CHARSET and
1132 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1133 FOUND-FLAG t (or nil) means that the corresponding charset is
1134 already found (or not yet found).
2e344af3 1135
3263d5a2 1136 It may lookup a translation table TABLE if supplied. */
2e344af3 1137
3263d5a2
KH
1138static void
1139find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
1140 unsigned char *ptr;
1141 int nchars, nbytes;
1142 Lisp_Object charsets, table;
1143{
1144 unsigned char *pend = ptr + nbytes;
1145 int ncharsets = ASIZE (charsets);
1146
1147 if (nchars == nbytes)
1148 return;
1149
1150 while (ptr < pend)
1151 {
1152 int c = STRING_CHAR_ADVANCE (ptr);
1153 int i;
1154 int all_found = 1;
1155 Lisp_Object elt;
1156
1157 if (!NILP (table))
1158 c = translate_char (table, c);
1159 for (i = 0; i < ncharsets; i++)
1160 {
1161 elt = AREF (charsets, i);
1162 if (NILP (XCDR (elt)))
1163 {
1164 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1165
1166 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1167 XCDR (elt) = Qt;
1168 else
1169 all_found = 0;
1170 }
4ed46869 1171 }
3263d5a2
KH
1172 if (all_found)
1173 break;
4ed46869 1174 }
4ed46869
KH
1175}
1176
3263d5a2 1177
4ed46869 1178DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1179 2, 3, 0,
fdb82f93
PJ
1180 doc: /* Return a list of charsets in the region between BEG and END.
1181BEG and END are buffer positions.
1182Optional arg TABLE if non-nil is a translation table to look up.
1183
1184If the region contains invalid multibyte characters,
1185`unknown' is included in the returned list.
1186
1187If the current buffer is unibyte, the returned list may contain
1188only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1189 (beg, end, table)
23d2a7f1 1190 Lisp_Object beg, end, table;
4ed46869 1191{
3263d5a2 1192 Lisp_Object charsets;
6ae1f27e 1193 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1194 Lisp_Object val;
1195
1196 validate_region (&beg, &end);
1197 from = XFASTINT (beg);
1198 stop = to = XFASTINT (end);
6ae1f27e 1199
4ed46869 1200 if (from < GPT && GPT < to)
6ae1f27e
RS
1201 {
1202 stop = GPT;
1203 stop_byte = GPT_BYTE;
1204 }
1205 else
1206 stop_byte = CHAR_TO_BYTE (stop);
1207
1208 from_byte = CHAR_TO_BYTE (from);
1209
3263d5a2
KH
1210 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1211 for (i = 0; i < charset_table_used; i++)
1212 ASET (charsets, i, Fcons (make_number (i), Qnil));
1213
4ed46869
KH
1214 while (1)
1215 {
3263d5a2
KH
1216 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1217 stop_byte - from_byte, charsets, table);
4ed46869 1218 if (stop < to)
6ae1f27e
RS
1219 {
1220 from = stop, from_byte = stop_byte;
1221 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1222 }
4ed46869
KH
1223 else
1224 break;
1225 }
6ae1f27e 1226
4ed46869 1227 val = Qnil;
3263d5a2
KH
1228 for (i = charset_table_used - 1; i >= 0; i--)
1229 if (!NILP (XCDR (AREF (charsets, i))))
1230 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1231 return val;
1232}
1233
1234DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1235 1, 2, 0,
fdb82f93
PJ
1236 doc: /* Return a list of charsets in STR.
1237Optional arg TABLE if non-nil is a translation table to look up.
1238
1239If the string contains invalid multibyte characters,
1240`unknown' is included in the returned list.
1241
1242If STR is unibyte, the returned list may contain
3263d5a2 1243only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1244 (str, table)
23d2a7f1 1245 Lisp_Object str, table;
4ed46869 1246{
3263d5a2 1247 Lisp_Object charsets;
4ed46869
KH
1248 int i;
1249 Lisp_Object val;
1250
b7826503 1251 CHECK_STRING (str);
87b089ad 1252
3263d5a2
KH
1253 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1254 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1255 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1256
4ed46869 1257 val = Qnil;
3263d5a2
KH
1258 for (i = charset_table_used - 1; i >= 0; i--)
1259 if (!NILP (XCDR (AREF (charsets, i))))
1260 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1261 return val;
1262}
2e344af3 1263
4ed46869 1264\f
3263d5a2
KH
1265
1266/* Return a character correponding to the code-point CODE of
1267 CHARSET. */
1268
1269int
1270decode_char (charset, code)
1271 struct charset *charset;
1272 unsigned code;
4ed46869 1273{
3263d5a2
KH
1274 int c, char_index;
1275 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1276
3263d5a2
KH
1277 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1278 return -1;
4ed46869 1279
3263d5a2 1280 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1281 {
3263d5a2
KH
1282 load_charset (charset);
1283 method = CHARSET_METHOD (charset);
ac4137cc 1284 }
4ed46869 1285
3263d5a2 1286 if (method == CHARSET_METHOD_INHERIT)
2e344af3 1287 {
3263d5a2 1288 Lisp_Object parents;
4ed46869 1289
3263d5a2
KH
1290 parents = CHARSET_PARENTS (charset);
1291 c = -1;
1292 for (; CONSP (parents); parents = XCDR (parents))
1293 {
1294 int id = XINT (XCAR (XCAR (parents)));
1295 int code_offset = XINT (XCDR (XCAR (parents)));
1296 unsigned this_code = code + code_offset;
9d3d8cba 1297
3263d5a2
KH
1298 charset = CHARSET_FROM_ID (id);
1299 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1300 break;
1301 }
1302 }
1303 else
ac4137cc 1304 {
3263d5a2 1305 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1306 if (char_index < 0)
1307 return -1;
3263d5a2
KH
1308
1309 if (method == CHARSET_METHOD_MAP)
ac4137cc 1310 {
3263d5a2
KH
1311 Lisp_Object decoder;
1312
1313 decoder = CHARSET_DECODER (charset);
1314 if (! VECTORP (decoder))
1315 return -1;
1316 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1317 }
1318 else
1319 {
3263d5a2 1320 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1321 }
1322 }
9d3d8cba 1323
3263d5a2
KH
1324 if (CHARSET_UNIFIED_P (charset)
1325 && c >= 0)
c449997d
KH
1326 {
1327 MAYBE_UNIFY_CHAR (c);
1328 }
d2665018 1329
3263d5a2 1330 return c;
d2665018
KH
1331}
1332
1bcc1567 1333
3263d5a2
KH
1334/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1335 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1bcc1567 1336
3263d5a2
KH
1337unsigned
1338encode_char (charset, c)
1339 struct charset *charset;
9b6a601f
KH
1340 int c;
1341{
3263d5a2
KH
1342 unsigned code;
1343 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1344
3263d5a2 1345 if (CHARSET_UNIFIED_P (charset))
4ed46869 1346 {
3263d5a2
KH
1347 Lisp_Object deunifier;
1348 int deunified;
4ed46869 1349
3263d5a2
KH
1350 deunifier = CHARSET_DEUNIFIER (charset);
1351 if (! CHAR_TABLE_P (deunifier))
1352 {
1353 Funify_charset (CHARSET_NAME (charset), Qnil);
1354 deunifier = CHARSET_DEUNIFIER (charset);
1355 }
1356 deunified = XINT (CHAR_TABLE_REF (deunifier, c));
1357 if (deunified > 0)
1358 c = deunified;
4ed46869 1359 }
beeedaad 1360
3263d5a2
KH
1361 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1362 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1363 return CHARSET_INVALID_CODE (charset);
beeedaad 1364
3263d5a2 1365 if (method == CHARSET_METHOD_INHERIT)
859f2b3c 1366 {
3263d5a2 1367 Lisp_Object parents;
859f2b3c 1368
3263d5a2
KH
1369 parents = CHARSET_PARENTS (charset);
1370 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1371 {
3263d5a2
KH
1372 int id = XINT (XCAR (XCAR (parents)));
1373 int code_offset = XINT (XCDR (XCAR (parents)));
1374 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1375
3263d5a2
KH
1376 code = ENCODE_CHAR (this_charset, c);
1377 if (code != CHARSET_INVALID_CODE (this_charset)
1378 && (code_offset < 0 || code >= code_offset))
1379 {
1380 code -= code_offset;
69f8de5b
KH
1381 if (code >= charset->min_code && code <= charset->max_code
1382 && CODE_POINT_TO_INDEX (charset, code) >= 0)
3263d5a2
KH
1383 return code;
1384 }
beeedaad 1385 }
3263d5a2
KH
1386 return CHARSET_INVALID_CODE (charset);
1387 }
99529c2c 1388
3263d5a2 1389 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1390 {
3263d5a2
KH
1391 load_charset (charset);
1392 method = CHARSET_METHOD (charset);
859f2b3c 1393 }
beeedaad 1394
3263d5a2 1395 if (method == CHARSET_METHOD_MAP)
3f62427c 1396 {
3263d5a2 1397 Lisp_Object encoder;
beeedaad 1398 Lisp_Object val;
beeedaad 1399
3263d5a2
KH
1400 encoder = CHARSET_ENCODER (charset);
1401 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1402 return CHARSET_INVALID_CODE (charset);
1403 val = CHAR_TABLE_REF (encoder, c);
e9ce014c
KH
1404 code = XINT (val);
1405 if (! CHARSET_COMPACT_CODES_P (charset))
1406 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2
KH
1407 }
1408 else
beeedaad 1409 {
3263d5a2
KH
1410 code = c - CHARSET_CODE_OFFSET (charset);
1411 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1412 }
beeedaad 1413
3263d5a2 1414 return code;
3f62427c
KH
1415}
1416
4ed46869 1417
3263d5a2
KH
1418DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1419 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1420Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1421
3263d5a2
KH
1422CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1423
1424Optional argument RESTRICTION specifies a way to map the pair of CCS
1425and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1426 (charset, code_point, restriction)
1427 Lisp_Object charset, code_point, restriction;
4ed46869 1428{
3263d5a2
KH
1429 int c, id;
1430 unsigned code;
1431 struct charset *charsetp;
4ed46869 1432
3263d5a2
KH
1433 CHECK_CHARSET_GET_ID (charset, id);
1434 if (CONSP (code_point))
1435 {
1436 CHECK_NATNUM (XCAR (code_point));
1437 CHECK_NATNUM (XCDR (code_point));
69f8de5b 1438 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2
KH
1439 }
1440 else
1441 {
1442 CHECK_NATNUM (code_point);
1443 code = XINT (code_point);
1444 }
1445 charsetp = CHARSET_FROM_ID (id);
1446 c = DECODE_CHAR (charsetp, code);
1447 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1448}
1449
046b1f03 1450
3263d5a2
KH
1451DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1452 doc: /* Encode the character CH into a code-point of CHARSET.
1453Return nil if CHARSET doesn't include CH.
17e7ef1b 1454
3263d5a2
KH
1455Optional argument RESTRICTION specifies a way to map CHAR to a
1456code-point in CCS. Currently not supported and just ignored. */)
1457 (ch, charset, restriction)
1458 Lisp_Object ch, charset, restriction;
1459{
1460 int c, id;
1461 unsigned code;
1462 struct charset *charsetp;
046b1f03 1463
3263d5a2
KH
1464 CHECK_CHARSET_GET_ID (charset, id);
1465 CHECK_NATNUM (ch);
1466 c = XINT (ch);
1467 charsetp = CHARSET_FROM_ID (id);
1468 code = ENCODE_CHAR (charsetp, ch);
1469 if (code == CHARSET_INVALID_CODE (charsetp))
1470 return Qnil;
1471 if (code > 0x7FFFFFF)
1472 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1473 return make_number (code);
6ae1f27e 1474}
9036eb45 1475
87b089ad 1476
b121a744
KH
1477DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1478 doc:
1479 /* Return a character of CHARSET whose position codes are CODEn.
1480
1481CODE1 through CODE4 are optional, but if you don't supply sufficient
1482position codes, it is assumed that the minimum code in each dimension
04c2f2c5 1483is specified. */)
b121a744
KH
1484 (charset, code1, code2, code3, code4)
1485 Lisp_Object charset, code1, code2, code3, code4;
87b089ad 1486{
3263d5a2
KH
1487 int id, dimension;
1488 struct charset *charsetp;
b121a744
KH
1489 unsigned code;
1490 int c;
87b089ad 1491
3263d5a2
KH
1492 CHECK_CHARSET_GET_ID (charset, id);
1493 charsetp = CHARSET_FROM_ID (id);
87b089ad 1494
b121a744
KH
1495 dimension = CHARSET_DIMENSION (charsetp);
1496 if (NILP (code1))
d47073ca
KH
1497 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1498 ? 0 : CHARSET_MIN_CODE (charsetp));
3263d5a2 1499 else
87b089ad 1500 {
b121a744
KH
1501 CHECK_NATNUM (code1);
1502 if (XFASTINT (code1) >= 0x100)
1503 args_out_of_range (make_number (0xFF), code1);
1504 code = XFASTINT (code1);
2e344af3 1505
b0a1e45e 1506 if (dimension > 1)
b121a744
KH
1507 {
1508 code <<= 8;
b0a1e45e
KH
1509 if (NILP (code2))
1510 code |= charsetp->code_space[(dimension - 2) * 4];
b121a744
KH
1511 else
1512 {
b0a1e45e
KH
1513 CHECK_NATNUM (code2);
1514 if (XFASTINT (code2) >= 0x100)
1515 args_out_of_range (make_number (0xFF), code2);
1516 code |= XFASTINT (code2);
b121a744
KH
1517 }
1518
b0a1e45e 1519 if (dimension > 2)
b121a744
KH
1520 {
1521 code <<= 8;
b0a1e45e
KH
1522 if (NILP (code3))
1523 code |= charsetp->code_space[(dimension - 3) * 4];
b121a744
KH
1524 else
1525 {
b0a1e45e
KH
1526 CHECK_NATNUM (code3);
1527 if (XFASTINT (code3) >= 0x100)
1528 args_out_of_range (make_number (0xFF), code3);
1529 code |= XFASTINT (code3);
1530 }
1531
1532 if (dimension > 3)
1533 {
1534 code <<= 8;
1535 if (NILP (code4))
1536 code |= charsetp->code_space[0];
1537 else
1538 {
1539 CHECK_NATNUM (code4);
1540 if (XFASTINT (code4) >= 0x100)
1541 args_out_of_range (make_number (0xFF), code4);
1542 code |= XFASTINT (code4);
1543 }
b121a744
KH
1544 }
1545 }
1546 }
1547 }
3263d5a2 1548
b121a744
KH
1549 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1550 code &= 0x7F7F7F7F;
1551 c = DECODE_CHAR (charsetp, code);
1552 if (c < 0)
1553 error ("Invalid code(s)");
3263d5a2 1554 return make_number (c);
2e344af3
KH
1555}
1556
3263d5a2
KH
1557
1558/* Return the first charset in CHARSET_LIST that contains C.
1559 CHARSET_LIST is a list of charset IDs. If it is nil, use
1560 Vcharset_ordered_list. */
1561
1562struct charset *
1563char_charset (c, charset_list, code_return)
1564 int c;
1565 Lisp_Object charset_list;
1566 unsigned *code_return;
2e344af3 1567{
3263d5a2
KH
1568 if (NILP (charset_list))
1569 charset_list = Vcharset_ordered_list;
2e344af3 1570
3263d5a2 1571 while (CONSP (charset_list))
2e344af3 1572 {
3263d5a2
KH
1573 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1574 unsigned code = ENCODE_CHAR (charset, c);
1575
1576 if (code != CHARSET_INVALID_CODE (charset))
1577 {
1578 if (code_return)
1579 *code_return = code;
1580 return charset;
1581 }
1582 charset_list = XCDR (charset_list);
2e344af3 1583 }
3263d5a2 1584 return NULL;
2e344af3
KH
1585}
1586
2e344af3 1587
3263d5a2 1588DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
04c2f2c5 1589 doc: /*Return list of charset and one to three position-codes of CHAR.
3263d5a2
KH
1590If CHAR is invalid as a character code,
1591return a list of symbol `unknown' and CHAR. */)
1592 (ch)
1593 Lisp_Object ch;
2e344af3 1594{
3263d5a2
KH
1595 struct charset *charset;
1596 int c, dimension;
1597 unsigned code;
1598 Lisp_Object val;
1599
1600 CHECK_CHARACTER (ch);
1601 c = XFASTINT (ch);
1602 charset = CHAR_CHARSET (c);
1603 if (! charset)
1604 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1605
1606 code = ENCODE_CHAR (charset, c);
1607 if (code == CHARSET_INVALID_CODE (charset))
1608 abort ();
1609 dimension = CHARSET_DIMENSION (charset);
1610 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1611 : dimension == 2 ? Fcons (make_number (code >> 8),
1612 Fcons (make_number (code & 0xFF), Qnil))
1613 : Fcons (make_number (code >> 16),
1614 Fcons (make_number ((code >> 8) & 0xFF),
1615 Fcons (make_number (code & 0xFF), Qnil))));
1616 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1617}
87b089ad 1618
740f080d 1619
3263d5a2
KH
1620DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1621 doc: /* Return the charset of highest priority that contains CHAR. */)
1622 (ch)
1623 Lisp_Object ch;
740f080d 1624{
3263d5a2 1625 struct charset *charset;
740f080d 1626
3263d5a2
KH
1627 CHECK_CHARACTER (ch);
1628 charset = CHAR_CHARSET (XINT (ch));
1629 return (CHARSET_NAME (charset));
740f080d
KH
1630}
1631
2e344af3 1632
3263d5a2
KH
1633DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1634 doc: /*
1635Return charset of a character in the current buffer at position POS.
1636If POS is nil, it defauls to the current point.
1637If POS is out of range, the value is nil. */)
1638 (pos)
1639 Lisp_Object pos;
2e344af3 1640{
3263d5a2
KH
1641 Lisp_Object ch;
1642 struct charset *charset;
1643
1644 ch = Fchar_after (pos);
1645 if (! INTEGERP (ch))
1646 return ch;
1647 charset = CHAR_CHARSET (XINT (ch));
1648 return (CHARSET_NAME (charset));
87b089ad
RS
1649}
1650
2e344af3 1651
3263d5a2
KH
1652DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1653 doc: /*
1654Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1655
1656ISO 2022's designation sequence (escape sequence) distinguishes charsets
1657by their DIMENSION, CHARS, and FINAL-CHAR,
1658where as Emacs distinguishes them by charset symbol.
1659See the documentation of the function `charset-info' for the meanings of
1660DIMENSION, CHARS, and FINAL-CHAR. */)
1661 (dimension, chars, final_char)
1662 Lisp_Object dimension, chars, final_char;
2e344af3 1663{
3263d5a2 1664 int id;
2e344af3 1665
3263d5a2
KH
1666 check_iso_charset_parameter (dimension, chars, final_char);
1667 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1668 XFASTINT (final_char));
1669 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1670}
1671
3263d5a2
KH
1672
1673DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1674 0, 0, 0,
1675 doc: /*
1676Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1677 ()
4ed46869 1678{
53316e55 1679 int i;
3263d5a2
KH
1680 struct charset *charset;
1681 Lisp_Object attrs;
4ed46869 1682
3263d5a2 1683 for (i = 0; i < charset_table_used; i++)
4ed46869 1684 {
3263d5a2
KH
1685 charset = CHARSET_FROM_ID (i);
1686 attrs = CHARSET_ATTRIBUTES (charset);
1687
1688 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1689 {
1690 CHARSET_ATTR_DECODER (attrs) = Qnil;
1691 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1692 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1693 }
1694
1695 if (CHARSET_UNIFIED_P (charset))
1696 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1697 }
1698
3263d5a2 1699 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1700 {
3263d5a2
KH
1701 Foptimize_char_table (Vchar_unified_charset_table);
1702 Vchar_unify_table = Vchar_unified_charset_table;
1703 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1704 }
1705
3263d5a2 1706 return Qnil;
4ed46869
KH
1707}
1708
4ed46869 1709\f
3263d5a2
KH
1710void
1711init_charset ()
4ed46869 1712{
4ed46869 1713
4ed46869
KH
1714}
1715
4ed46869 1716
dfcf069d 1717void
4ed46869
KH
1718init_charset_once ()
1719{
1720 int i, j, k;
1721
3263d5a2
KH
1722 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1723 for (j = 0; j < ISO_MAX_CHARS; j++)
1724 for (k = 0; k < ISO_MAX_FINAL; k++)
1725 iso_charset_table[i][j][k] = -1;
1726
1727 for (i = 0; i < 255; i++)
1728 emacs_mule_charset[i] = NULL;
4ed46869 1729
3263d5a2
KH
1730#if 0
1731 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1732 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1733
1734 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1735
1736 /* Intern this now in case it isn't already done.
1737 Setting this variable twice is harmless.
1738 But don't staticpro it here--that is done in alloc.c. */
1739 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1740
3263d5a2
KH
1741 /* Now we are ready to set up this property, so we can create syntax
1742 tables. */
1743 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1744#endif
4ed46869
KH
1745}
1746
1747#ifdef emacs
1748
dfcf069d 1749void
4ed46869
KH
1750syms_of_charset ()
1751{
3263d5a2
KH
1752 char *p;
1753
1754 DEFSYM (Qcharsetp, "charsetp");
1755
1756 DEFSYM (Qascii, "ascii");
1757 DEFSYM (Qunicode, "unicode");
1758 DEFSYM (Qeight_bit_control, "eight-bit-control");
1759 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1760 DEFSYM (Qiso_8859_1, "iso-8859-1");
1761
1762 DEFSYM (Qgl, "gl");
1763 DEFSYM (Qgr, "gr");
1764
1765 p = (char *) xmalloc (30000);
1766
1767 staticpro (&Vcharset_ordered_list);
1768 Vcharset_ordered_list = Qnil;
1769
1770 staticpro (&Viso_2022_charset_list);
1771 Viso_2022_charset_list = Qnil;
1772
1773 staticpro (&Vemacs_mule_charset_list);
1774 Vemacs_mule_charset_list = Qnil;
1775
1776 staticpro (&Vcharset_hash_table);
1777 Vcharset_hash_table = Fmakehash (Qeq);
1778
1779 charset_table_size = 128;
1780 charset_table = ((struct charset *)
1781 xmalloc (sizeof (struct charset) * charset_table_size));
1782 charset_table_used = 0;
1783
1784 staticpro (&Vchar_unified_charset_table);
1785 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
1786
1787 defsubr (&Scharsetp);
1788 defsubr (&Smap_charset_chars);
1789 defsubr (&Sdefine_charset_internal);
1790 defsubr (&Sdefine_charset_alias);
1791 defsubr (&Sprimary_charset);
1792 defsubr (&Sset_primary_charset);
1793 defsubr (&Scharset_plist);
1794 defsubr (&Sset_charset_plist);
1795 defsubr (&Sunify_charset);
3fac5a51 1796 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1797 defsubr (&Sdeclare_equiv_charset);
1798 defsubr (&Sfind_charset_region);
1799 defsubr (&Sfind_charset_string);
3263d5a2
KH
1800 defsubr (&Sdecode_char);
1801 defsubr (&Sencode_char);
4ed46869 1802 defsubr (&Ssplit_char);
3263d5a2 1803 defsubr (&Smake_char);
4ed46869 1804 defsubr (&Schar_charset);
90d7b74e 1805 defsubr (&Scharset_after);
4ed46869 1806 defsubr (&Siso_charset);
3263d5a2
KH
1807 defsubr (&Sclear_charset_maps);
1808
1809 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
1810 doc: /* Directory of charset map files that come with GNU Emacs.
04c2f2c5 1811The default value is sub-directory "charsets" of `data-directory'. */);
3263d5a2
KH
1812 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
1813 Vdata_directory);
4ed46869
KH
1814
1815 DEFVAR_LISP ("charset-list", &Vcharset_list,
fdb82f93 1816 doc: /* List of charsets ever defined. */);
3263d5a2
KH
1817 Vcharset_list = Qnil;
1818
1819 /* Make the prerequisite charset `ascii' and `unicode'. */
1820 {
1821 Lisp_Object args[charset_arg_max];
1822 Lisp_Object plist[14];
1823 Lisp_Object val;
1824
1825 plist[0] = intern (":name");
1826 plist[1] = args[charset_arg_name] = Qascii;
1827 plist[2] = intern (":dimension");
1828 plist[3] = args[charset_arg_dimension] = make_number (1);
1829 val = Fmake_vector (make_number (8), make_number (0));
1830 ASET (val, 1, make_number (127));
1831 plist[4] = intern (":code-space");
1832 plist[5] = args[charset_arg_code_space] = val;
1833 plist[6] = intern (":iso-final-char");
1834 plist[7] = args[charset_arg_iso_final] = make_number ('B');
1835 args[charset_arg_iso_revision] = Qnil;
1836 plist[8] = intern (":emacs-mule-id");
1837 plist[9] = args[charset_arg_emacs_mule_id] = make_number (0);
1838 plist[10] = intern (":ascii-compatible-p");
1839 plist[11] = args[charset_arg_ascii_compatible_p] = Qt;
1840 args[charset_arg_supplementary_p] = Qnil;
1841 args[charset_arg_invalid_code] = Qnil;
1842 plist[12] = intern (":code-offset");
1843 plist[13] = args[charset_arg_code_offset] = make_number (0);
1844 args[charset_arg_map] = Qnil;
1845 args[charset_arg_parents] = Qnil;
1846 args[charset_arg_unify_map] = Qnil;
1847 /* The actual plist is set by mule-conf.el. */
1848 args[charset_arg_plist] = Flist (14, plist);
1849 Fdefine_charset_internal (charset_arg_max, args);
1850 charset_ascii = CHARSET_SYMBOL_ID (Qascii);
1851
1852 plist[1] = args[charset_arg_name] = Qunicode;
1853 plist[3] = args[charset_arg_dimension] = make_number (3);
1854 val = Fmake_vector (make_number (8), make_number (0));
1855 ASET (val, 1, make_number (255));
1856 ASET (val, 3, make_number (255));
1857 ASET (val, 5, make_number (16));
1858 plist[5] = args[charset_arg_code_space] = val;
1859 plist[7] = args[charset_arg_iso_final] = Qnil;
1860 args[charset_arg_iso_revision] = Qnil;
1861 plist[9] = args[charset_arg_emacs_mule_id] = Qnil;
1862 plist[11] = args[charset_arg_ascii_compatible_p] = Qt;
1863 args[charset_arg_supplementary_p] = Qnil;
1864 args[charset_arg_invalid_code] = Qnil;
1865 plist[13] = args[charset_arg_code_offset] = make_number (0);
1866 args[charset_arg_map] = Qnil;
1867 args[charset_arg_parents] = Qnil;
1868 args[charset_arg_unify_map] = Qnil;
1869 /* The actual plist is set by mule-conf.el. */
1870 args[charset_arg_plist] = Flist (14, plist);
1871 Fdefine_charset_internal (charset_arg_max, args);
1872 charset_unicode = CHARSET_SYMBOL_ID (Qunicode);
1873 }
4ed46869
KH
1874}
1875
1876#endif /* emacs */