(Fmap_charset_chars): Fix docstring.
[bpt/emacs.git] / src / charset.c
CommitLineData
3263d5a2
KH
1/* Basic character set support.
2 Copyright (C) 1995, 97, 98, 2000, 2001 Electrotechnical Laboratory, JAPAN.
75c8c592 3 Licensed to the Free Software Foundation.
12bcae05 4 Copyright (C) 2001 Free Software Foundation, Inc.
3263d5a2
KH
5 Copyright (C) 2001, 2002
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H13PRO009
4ed46869 8
369314dc
KH
9This file is part of GNU Emacs.
10
11GNU Emacs is free software; you can redistribute it and/or modify
12it under the terms of the GNU General Public License as published by
13the Free Software Foundation; either version 2, or (at your option)
14any later version.
4ed46869 15
369314dc
KH
16GNU Emacs is distributed in the hope that it will be useful,
17but WITHOUT ANY WARRANTY; without even the implied warranty of
18MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19GNU General Public License for more details.
4ed46869 20
369314dc
KH
21You should have received a copy of the GNU General Public License
22along with GNU Emacs; see the file COPYING. If not, write to
23the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24Boston, MA 02111-1307, USA. */
4ed46869 25
68c45bf0
PE
26#ifdef emacs
27#include <config.h>
28#endif
29
4ed46869 30#include <stdio.h>
3263d5a2
KH
31#include <unistd.h>
32#include <ctype.h>
4ed46869
KH
33
34#ifdef emacs
35
36#include <sys/types.h>
4ed46869 37#include "lisp.h"
3263d5a2 38#include "character.h"
4ed46869
KH
39#include "charset.h"
40#include "coding.h"
fc6b09bf 41#include "disptab.h"
3263d5a2 42#include "buffer.h"
4ed46869
KH
43
44#else /* not emacs */
45
46#include "mulelib.h"
47
48#endif /* emacs */
49
4ed46869 50
3263d5a2 51/*** GENERAL NOTE on CODED CHARACTER SET (CHARSET) ***
4ed46869 52
3263d5a2
KH
53 A coded character set ("charset" hereafter) is a meaningful
54 collection (i.e. language, culture, functionality, etc) of
55 characters. Emacs handles multiple charsets at once. In Emacs Lisp
56 code, a charset is represented by symbol. In C code, a charset is
57 represented by its ID number or by a pointer the struct charset.
4ed46869 58
3263d5a2
KH
59 The actual information about each charset is stored in two places.
60 Lispy information is stored in the hash table Vcharset_hash_table as
61 a vector (charset attributes). The other information is stored in
62 charset_table as struct charset.
4ed46869 63
3263d5a2 64*/
4ed46869 65
3263d5a2
KH
66/* List of all charsets. This variable is used only from Emacs
67 Lisp. */
4ed46869
KH
68Lisp_Object Vcharset_list;
69
3263d5a2
KH
70/* Hash table that contains attributes of each charset. Keys are
71 charset symbols, and values are vectors of charset attributes. */
72Lisp_Object Vcharset_hash_table;
73
74/* Table of struct charset. */
75struct charset *charset_table;
76
77static int charset_table_size;
78int charset_table_used;
79
80Lisp_Object Qcharsetp;
81
82/* Special charset symbols. */
83Lisp_Object Qascii;
84Lisp_Object Qeight_bit_control;
85Lisp_Object Qeight_bit_graphic;
86Lisp_Object Qiso_8859_1;
87Lisp_Object Qunicode;
b0e3cf2b 88
3263d5a2
KH
89/* The corresponding charsets. */
90int charset_ascii;
91int charset_8_bit_control;
92int charset_8_bit_graphic;
93int charset_iso_8859_1;
94int charset_unicode;
c1a08b4c 95
3263d5a2
KH
96/* Value of charset attribute `charset-iso-plane'. */
97Lisp_Object Qgl, Qgr;
c1a08b4c 98
3263d5a2
KH
99/* The primary charset. It is a charset of unibyte characters. */
100int charset_primary;
101
102/* List of charsets ordered by the priority. */
103Lisp_Object Vcharset_ordered_list;
104
105/* List of iso-2022 charsets. */
106Lisp_Object Viso_2022_charset_list;
107
108/* List of emacs-mule charsets. */
109Lisp_Object Vemacs_mule_charset_list;
110
111struct charset *emacs_mule_charset[256];
4ed46869
KH
112
113/* Mapping table from ISO2022's charset (specified by DIMENSION,
114 CHARS, and FINAL-CHAR) to Emacs' charset. */
3263d5a2
KH
115int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
116
117Lisp_Object Vcharset_map_directory;
118
119Lisp_Object Vchar_unified_charset_table;
120
69f8de5b
KH
121#define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (code) - (charset)->min_code \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0])) \
3263d5a2
KH
135 : -1)
136
137
138/* Convert the character index IDX to code-point CODE for CHARSET.
139 It is assumed that IDX is in a valid range. */
140
141#define INDEX_TO_CODE_POINT(charset, idx) \
142 ((charset)->code_linear_p \
143 ? (idx) + (charset)->min_code \
144 : (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
147 << 8) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
150 << 16) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
152 << 24)))
4ed46869 153
3263d5a2 154\f
4ed46869 155
e9ce014c
KH
156/* Set to 1 to warn that a charset map is loaded and thus a buffer
157 text and a string data may be relocated. */
3263d5a2 158int charset_map_loaded;
35e623fb 159
e9ce014c
KH
160struct charset_map_entries
161{
162 struct {
163 unsigned from, to;
164 int c;
165 } entry[0x10000];
166 struct charset_map_entries *next;
167};
168
169/* Load the mapping information for CHARSET from ENTRIES.
4cf9710d 170
3263d5a2 171 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
8a73a704 172
3263d5a2
KH
173 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
174 CHARSET->decoder, and CHARSET->encoder.
93bcb785 175
3263d5a2
KH
176 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
177 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
178 setup it too. */
4ed46869 179
3263d5a2 180static void
e9ce014c 181load_charset_map (charset, entries, n_entries, control_flag)
3263d5a2 182 struct charset *charset;
e9ce014c
KH
183 struct charset_map_entries *entries;
184 int n_entries;
3263d5a2 185 int control_flag;
4ed46869 186{
3263d5a2
KH
187 Lisp_Object vec, table;
188 unsigned min_code = CHARSET_MIN_CODE (charset);
189 unsigned max_code = CHARSET_MAX_CODE (charset);
190 int ascii_compatible_p = charset->ascii_compatible_p;
191 int min_char, max_char, nonascii_min_char;
3263d5a2
KH
192 int i;
193 int first;
194 unsigned char *fast_map = charset->fast_map;
99529c2c 195
e9ce014c
KH
196 if (n_entries <= 0)
197 return;
198
199 if (control_flag > 0)
8ac5a9cc 200 {
3263d5a2
KH
201 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
202 unsigned invalid_code = CHARSET_INVALID_CODE (charset);
6662e69b 203
3263d5a2
KH
204 table = Fmake_char_table (Qnil, make_number (invalid_code));
205 if (control_flag == 1)
206 vec = Fmake_vector (make_number (n), make_number (-1));
207 else if (! CHAR_TABLE_P (Vchar_unify_table))
208 Vchar_unify_table = Fmake_char_table (Qnil, make_number (-1));
6662e69b 209
3263d5a2 210 charset_map_loaded = 1;
2e344af3 211 }
3263d5a2 212
e9ce014c 213 min_char = max_char = entries->entry[0].c;
3263d5a2 214 nonascii_min_char = MAX_CHAR;
e9ce014c 215 for (i = 0; i < n_entries; i++)
2e344af3 216 {
e9ce014c 217 unsigned from, to;
3263d5a2 218 int c, char_index;
e9ce014c 219 int idx = i % 0x10000;
3263d5a2 220
e9ce014c
KH
221 if (i > 0 && idx == 0)
222 entries = entries->next;
223 from = entries->entry[idx].from;
224 to = entries->entry[idx].to;
225 c = entries->entry[idx].c;
3263d5a2 226
3263d5a2
KH
227 if (control_flag < 2)
228 {
e9ce014c 229 if (control_flag == 1)
3263d5a2 230 {
e9ce014c
KH
231 unsigned code = from;
232 int from_index, to_index;
233
234 from_index = CODE_POINT_TO_INDEX (charset, from);
235 if (from == to)
236 to_index = from_index;
237 else
238 to_index = CODE_POINT_TO_INDEX (charset, to);
239 if (from_index < 0 || to_index < 0)
240 continue;
241 if (CHARSET_COMPACT_CODES_P (charset))
242 while (1)
243 {
244 ASET (vec, from_index, make_number (c));
245 CHAR_TABLE_SET (table, c, make_number (code));
246 if (from_index == to_index)
247 break;
248 from_index++, c++;
249 code = INDEX_TO_CODE_POINT (charset, from_index);
250 }
251 else
252 for (; from_index <= to_index; from_index++, c++)
253 {
254 ASET (vec, from_index, make_number (c));
255 CHAR_TABLE_SET (table, c, make_number (from_index));
256 }
3263d5a2 257 }
e9ce014c
KH
258
259 if (c > max_char)
3263d5a2
KH
260 max_char = c;
261 else if (c < min_char)
262 min_char = c;
263 if (ascii_compatible_p && ! ASCII_BYTE_P (c)
264 && c < nonascii_min_char)
265 nonascii_min_char = c;
266
267 CHARSET_FAST_MAP_SET (c, fast_map);
268 }
e9ce014c 269 else
2e344af3 270 {
69f8de5b
KH
271 unsigned code = from;
272 int from_index, to_index;
e9ce014c 273
69f8de5b
KH
274 from_index = CODE_POINT_TO_INDEX (charset, from);
275 if (from == to)
276 to_index = from_index;
277 else
278 to_index = CODE_POINT_TO_INDEX (charset, to);
279 if (from_index < 0 || to_index < 0)
280 continue;
281 while (1)
282 {
283 int c1 = DECODE_CHAR (charset, code);
284
3263d5a2
KH
285 if (c1 >= 0)
286 {
287 CHAR_TABLE_SET (table, c, make_number (c1));
288 CHAR_TABLE_SET (Vchar_unify_table, c1, c);
289 if (CHAR_TABLE_P (Vchar_unified_charset_table))
290 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
291 CHARSET_NAME (charset));
292 }
69f8de5b
KH
293 if (from_index == to_index)
294 break;
295 from_index++, c++;
296 code = INDEX_TO_CODE_POINT (charset, from_index);
3263d5a2 297 }
2e344af3 298 }
8ac5a9cc 299 }
3263d5a2
KH
300
301 if (control_flag < 2)
4ed46869 302 {
3263d5a2
KH
303 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
304 ? nonascii_min_char : min_char);
305 CHARSET_MAX_CHAR (charset) = max_char;
e9ce014c 306 if (control_flag == 1)
4ed46869 307 {
3263d5a2
KH
308 CHARSET_DECODER (charset) = vec;
309 CHARSET_ENCODER (charset) = table;
4ed46869
KH
310 }
311 }
2e344af3 312 else
3263d5a2 313 CHARSET_DEUNIFIER (charset) = table;
4ed46869
KH
314}
315
12bcae05 316
3263d5a2
KH
317/* Read a hexadecimal number (preceded by "0x") from the file FP while
318 paying attention to comment charcter '#'. */
12bcae05 319
3263d5a2
KH
320static INLINE unsigned
321read_hex (fp, eof)
322 FILE *fp;
323 int *eof;
12bcae05 324{
3263d5a2
KH
325 int c;
326 unsigned n;
12bcae05 327
3263d5a2
KH
328 while ((c = getc (fp)) != EOF)
329 {
69f8de5b 330 if (c == '#')
3263d5a2
KH
331 {
332 while ((c = getc (fp)) != EOF && c != '\n');
333 }
334 else if (c == '0')
335 {
336 if ((c = getc (fp)) == EOF || c == 'x')
337 break;
338 }
339 }
340 if (c == EOF)
341 {
342 *eof = 1;
343 return 0;
344 }
345 *eof = 0;
346 n = 0;
347 if (c == 'x')
348 while ((c = getc (fp)) != EOF && isxdigit (c))
349 n = ((n << 4)
350 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
351 else
352 while ((c = getc (fp)) != EOF && isdigit (c))
353 n = (n * 10) + c - '0';
e9ce014c
KH
354 if (c != EOF)
355 ungetc (c, fp);
3263d5a2
KH
356 return n;
357}
12bcae05 358
537efd8d 359
3263d5a2 360/* Return a mapping vector for CHARSET loaded from MAPFILE.
e9ce014c
KH
361 Each line of MAPFILE has this form
362 0xAAAA 0xCCCC
363 where 0xAAAA is a code-point and 0xCCCC is the corresponding
364 character code, or this form
365 0xAAAA-0xBBBB 0xCCCC
366 where 0xAAAA and 0xBBBB are code-points specifying a range, and
367 0xCCCC is the first character code of the range.
368
3263d5a2
KH
369 The returned vector has this form:
370 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
e9ce014c
KH
371 where CODE1 is a code-point or a cons of code-points specifying a
372 range. */
4ed46869 373
c449997d
KH
374extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
375
e9ce014c
KH
376static void
377load_charset_map_from_file (charset, mapfile, control_flag)
3263d5a2
KH
378 struct charset *charset;
379 Lisp_Object mapfile;
e9ce014c 380 int control_flag;
4ed46869 381{
e9ce014c
KH
382 unsigned min_code = CHARSET_MIN_CODE (charset);
383 unsigned max_code = CHARSET_MAX_CODE (charset);
3263d5a2
KH
384 int fd;
385 FILE *fp;
3263d5a2
KH
386 int eof;
387 Lisp_Object suffixes;
3263d5a2 388 int i;
e9ce014c
KH
389 struct charset_map_entries *head, *entries;
390 int n_entries;
4ed46869 391
3263d5a2
KH
392 suffixes = Fcons (build_string (".map"),
393 Fcons (build_string (".TXT"), Qnil));
4ed46869 394
3263d5a2
KH
395 fd = openp (Fcons (Vcharset_map_directory, Qnil), mapfile, suffixes,
396 NULL, 0);
397 if (fd < 0
398 || ! (fp = fdopen (fd, "r")))
399 {
400 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
e9ce014c 401 return;
3263d5a2 402 }
4ed46869 403
e9ce014c
KH
404 head = entries = ((struct charset_map_entries *)
405 alloca (sizeof (struct charset_map_entries)));
406 n_entries = 0;
3263d5a2
KH
407 eof = 0;
408 while (1)
409 {
e9ce014c
KH
410 unsigned from, to;
411 int c;
412 int idx;
4ed46869 413
e9ce014c 414 from = read_hex (fp, &eof);
3263d5a2
KH
415 if (eof)
416 break;
e9ce014c
KH
417 if (getc (fp) == '-')
418 to = read_hex (fp, &eof);
419 else
420 to = from;
421 c = (int) read_hex (fp, &eof);
422
423 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
424 continue;
425
426 if (n_entries > 0 && (n_entries % 0x10000) == 0)
3263d5a2 427 {
e9ce014c
KH
428 entries->next = ((struct charset_map_entries *)
429 alloca (sizeof (struct charset_map_entries)));
430 entries = entries->next;
3263d5a2 431 }
e9ce014c
KH
432 idx = n_entries % 0x10000;
433 entries->entry[idx].from = from;
434 entries->entry[idx].to = to;
435 entries->entry[idx].c = c;
436 n_entries++;
3263d5a2
KH
437 }
438 fclose (fp);
439 close (fd);
4ed46869 440
e9ce014c
KH
441 load_charset_map (charset, head, n_entries, control_flag);
442}
443
444static void
445load_charset_map_from_vector (charset, vec, control_flag)
446 struct charset *charset;
447 Lisp_Object vec;
448 int control_flag;
449{
450 unsigned min_code = CHARSET_MIN_CODE (charset);
451 unsigned max_code = CHARSET_MAX_CODE (charset);
452 struct charset_map_entries *head, *entries;
453 int n_entries;
454 int len = ASIZE (vec);
455 int i;
456
457 if (len % 2 == 1)
3263d5a2 458 {
e9ce014c
KH
459 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
460 return;
3263d5a2 461 }
4ed46869 462
e9ce014c
KH
463 head = entries = ((struct charset_map_entries *)
464 alloca (sizeof (struct charset_map_entries)));
465 n_entries = 0;
466 for (i = 0; i < len; i += 2)
467 {
468 Lisp_Object val, val2;
469 unsigned from, to;
470 int c;
471 int idx;
3263d5a2 472
e9ce014c
KH
473 val = AREF (vec, i);
474 if (CONSP (val))
475 {
476 val2 = XCDR (val);
477 val = XCAR (val);
478 CHECK_NATNUM (val);
479 CHECK_NATNUM (val2);
480 from = XFASTINT (val);
481 to = XFASTINT (val2);
482 }
483 else
484 {
485 CHECK_NATNUM (val);
486 from = to = XFASTINT (val);
487 }
488 val = AREF (vec, i + 1);
489 CHECK_NATNUM (val);
490 c = XFASTINT (val);
491
492 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
493 continue;
494
495 if ((n_entries % 0x10000) == 0)
496 {
497 entries->next = ((struct charset_map_entries *)
498 alloca (sizeof (struct charset_map_entries)));
499 entries = entries->next;
500 }
501 idx = n_entries % 0x10000;
502 entries->entry[idx].from = from;
503 entries->entry[idx].to = to;
504 entries->entry[idx].c = c;
505 n_entries++;
506 }
507
508 load_charset_map (charset, head, n_entries, control_flag);
ac4137cc
KH
509}
510
3263d5a2
KH
511static void
512load_charset (charset)
513 struct charset *charset;
ac4137cc 514{
3263d5a2
KH
515 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
516 {
517 Lisp_Object map;
ac4137cc 518
3263d5a2
KH
519 map = CHARSET_MAP (charset);
520 if (STRINGP (map))
e9ce014c
KH
521 load_charset_map_from_file (charset, map, 1);
522 else
523 load_charset_map_from_vector (charset, map, 1);
3263d5a2
KH
524 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
525 }
4ed46869
KH
526}
527
3263d5a2
KH
528
529DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
530 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
531 (object)
532 Lisp_Object object;
23d2a7f1 533{
3263d5a2 534 return (CHARSETP (object) ? Qt : Qnil);
23d2a7f1
KH
535}
536
35e623fb 537
3263d5a2
KH
538void
539map_charset_chars (c_function, function, charset_symbol, arg)
540 void (*c_function) (Lisp_Object, Lisp_Object, Lisp_Object);
541 Lisp_Object function, charset_symbol, arg;
35e623fb 542{
3263d5a2
KH
543 int id;
544 struct charset *charset;
545 Lisp_Object range;
d2665018 546
3263d5a2
KH
547 CHECK_CHARSET_GET_ID (charset_symbol, id);
548 charset = CHARSET_FROM_ID (id);
549
550 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
551 load_charset (charset);
552
553 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
554 {
555 range = Fcons (make_number (CHARSET_MIN_CHAR (charset)),
556 make_number (CHARSET_MAX_CHAR (charset)));
557 if (NILP (function))
558 (*c_function) (arg, range, Qnil);
559 else
560 call2 (function, range, arg);
561 }
562 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
563 {
564 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
565 return;
566 if (CHARSET_ASCII_COMPATIBLE_P (charset))
bbf12bb3 567 {
3263d5a2
KH
568 range = Fcons (make_number (0), make_number (127));
569 if (NILP (function))
570 (*c_function) (arg, range, Qnil);
571 else
572 call2 (function, range, arg);
bbf12bb3 573 }
3263d5a2
KH
574 map_char_table (c_function, function, CHARSET_ENCODER (charset), arg,
575 0, NULL);
576 }
577 else /* i.e. CHARSET_METHOD_PARENT */
578 {
579 int from, to, c;
580 unsigned code;
581 int i, j, k, l;
582 int *code_space = CHARSET_CODE_SPACE (charset);
583 Lisp_Object val;
584
585 range = Fcons (Qnil, Qnil);
586 from = to = -2;
587 for (i = code_space[12]; i <= code_space[13]; i++)
588 for (j = code_space[8]; j <= code_space[9]; j++)
589 for (k = code_space[4]; k <= code_space[5]; k++)
590 for (l = code_space[0]; l <= code_space[1]; l++)
591 {
592 code = (i << 24) | (j << 16) | (k << 8) | l;
593 c = DECODE_CHAR (charset, code);
594 if (c == to + 1)
595 {
596 to++;
597 continue;
598 }
599 if (from >= 0)
600 {
601 if (from < to)
602 {
603 XSETCAR (range, make_number (from));
604 XSETCDR (range, make_number (to));
605 val = range;
606 }
607 else
608 val = make_number (from);
609 if (NILP (function))
610 (*c_function) (arg, val, Qnil);
611 else
612 call2 (function, val, arg);
613 }
614 from = to = (c < 0 ? -2 : c);
615 }
616 if (from >= 0)
bbf12bb3 617 {
3263d5a2
KH
618 if (from < to)
619 {
620 XSETCAR (range, make_number (from));
621 XSETCDR (range, make_number (to));
622 val = range;
623 }
624 else
625 val = make_number (from);
626 if (NILP (function))
627 (*c_function) (arg, val, Qnil);
628 else
629 call2 (function, val, arg);
bbf12bb3 630 }
35e623fb 631 }
3263d5a2
KH
632}
633
634DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 3, 0,
635 doc: /* Call FUNCTION for each characters in CHARSET.
6abd9323 636FUNCTION is called with an argument RANGE and the 2nd optional
3263d5a2 637argument ARG.
6abd9323
KH
638
639RANGE is a cons (FROM . TO), where FROM and TO indicates a range of
640character sequence that are contained in CHARSET. */)
3263d5a2
KH
641 (function, charset, arg)
642 Lisp_Object function, charset, arg;
643{
644 map_charset_chars (NULL, function, charset, arg);
645 return Qnil;
35e623fb 646}
76d7b829
KH
647
648
3263d5a2
KH
649/* Define a charset according to the arguments. The Nth argument is
650 the Nth attribute of the charset (the last attribute `charset-id'
651 is not included). See the docstring of `define-charset' for the
652 detail. */
76d7b829 653
3263d5a2
KH
654DEFUN ("define-charset-internal", Fdefine_charset_internal,
655 Sdefine_charset_internal, charset_arg_max, MANY, 0,
656 doc: /* For internal use only. */)
657 (nargs, args)
658 int nargs;
659 Lisp_Object *args;
76d7b829 660{
3263d5a2
KH
661 /* Charset attr vector. */
662 Lisp_Object attrs;
663 Lisp_Object val;
664 unsigned hash_code;
665 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
69f8de5b 666 int i, j;
3263d5a2
KH
667 struct charset charset;
668 int id;
669 int dimension;
670 int new_definition_p;
671 int nchars;
672
673 if (nargs != charset_arg_max)
674 return Fsignal (Qwrong_number_of_arguments,
675 Fcons (intern ("define-charset-internal"),
676 make_number (nargs)));
677
678 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
679
680 CHECK_SYMBOL (args[charset_arg_name]);
681 ASET (attrs, charset_name, args[charset_arg_name]);
682
683 val = args[charset_arg_code_space];
684 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
76d7b829 685 {
3263d5a2
KH
686 int min_byte, max_byte;
687
688 min_byte = XINT (Faref (val, make_number (i * 2)));
689 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
690 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
691 error ("Invalid :code-space value");
692 charset.code_space[i * 4] = min_byte;
693 charset.code_space[i * 4 + 1] = max_byte;
694 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
695 nchars *= charset.code_space[i * 4 + 2];
696 charset.code_space[i * 4 + 3] = nchars;
697 if (max_byte > 0)
698 dimension = i + 1;
699 }
76d7b829 700
3263d5a2
KH
701 val = args[charset_arg_dimension];
702 if (NILP (val))
703 charset.dimension = dimension;
704 else
705 {
706 CHECK_NATNUM (val);
707 charset.dimension = XINT (val);
708 if (charset.dimension < 1 || charset.dimension > 4)
709 args_out_of_range_3 (val, make_number (1), make_number (4));
710 }
711
712 charset.code_linear_p
713 = (charset.dimension == 1
714 || (charset.code_space[2] == 256
715 && (charset.dimension == 2
716 || (charset.code_space[6] == 256
717 && (charset.dimension == 3
718 || charset.code_space[10] == 256)))));
719
69f8de5b
KH
720 if (! charset.code_linear_p)
721 {
722 charset.code_space_mask = (unsigned char *) xmalloc (256);
723 bzero (charset.code_space_mask, sizeof (charset.code_space_mask));
724 for (i = 0; i < 4; i++)
725 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
726 j++)
727 charset.code_space_mask[j] |= (1 << i);
728 }
729
3263d5a2
KH
730 charset.iso_chars_96 = charset.code_space[2] == 96;
731
732 charset.min_code = (charset.code_space[0]
733 | (charset.code_space[4] << 8)
734 | (charset.code_space[8] << 16)
735 | (charset.code_space[12] << 24));
736 charset.max_code = (charset.code_space[1]
737 | (charset.code_space[5] << 8)
738 | (charset.code_space[9] << 16)
739 | (charset.code_space[13] << 24));
740
e9ce014c
KH
741 charset.compact_codes_p = charset.max_code < 0x1000000;
742
3263d5a2
KH
743 val = args[charset_arg_invalid_code];
744 if (NILP (val))
745 {
746 if (charset.min_code > 0)
747 charset.invalid_code = 0;
bbf12bb3
KH
748 else
749 {
3263d5a2
KH
750 XSETINT (val, charset.max_code + 1);
751 if (XINT (val) == charset.max_code + 1)
752 charset.invalid_code = charset.max_code + 1;
753 else
754 error ("Attribute :invalid-code must be specified");
76d7b829 755 }
76d7b829 756 }
3263d5a2
KH
757 else
758 {
759 CHECK_NATNUM (val);
760 charset.invalid_code = XFASTINT (val);
761 }
76d7b829 762
3263d5a2
KH
763 val = args[charset_arg_iso_final];
764 if (NILP (val))
765 charset.iso_final = -1;
766 else
767 {
768 CHECK_NUMBER (val);
769 if (XINT (val) < '0' || XINT (val) > 127)
770 error ("Invalid iso-final-char: %d", XINT (val));
771 charset.iso_final = XINT (val);
772 }
773
774 val = args[charset_arg_iso_revision];
775 if (NILP (val))
776 charset.iso_revision = -1;
777 else
4ed46869 778 {
3263d5a2
KH
779 CHECK_NUMBER (val);
780 if (XINT (val) > 63)
781 args_out_of_range (make_number (63), val);
782 charset.iso_revision = XINT (val);
4ed46869 783 }
3263d5a2
KH
784
785 val = args[charset_arg_emacs_mule_id];
786 if (NILP (val))
787 charset.emacs_mule_id = -1;
4ed46869
KH
788 else
789 {
3263d5a2
KH
790 CHECK_NATNUM (val);
791 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
792 error ("Invalid emacs-mule-id: %d", XINT (val));
793 charset.emacs_mule_id = XINT (val);
c83ef371 794 }
6ef23ebb 795
3263d5a2 796 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
4ed46869 797
3263d5a2
KH
798 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
799
800 charset.unified_p = 0;
801
802 bzero (charset.fast_map, sizeof (charset.fast_map));
803
804 if (! NILP (args[charset_arg_code_offset]))
805 {
806 val = args[charset_arg_code_offset];
807 CHECK_NUMBER (val);
808
809 charset.method = CHARSET_METHOD_OFFSET;
810 charset.code_offset = XINT (val);
811
812 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
813 charset.min_char = i + charset.code_offset;
814 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
815 charset.max_char = i + charset.code_offset;
816 if (charset.max_char > MAX_CHAR)
817 error ("Unsupported max char: %d", charset.max_char);
818
819 for (i = charset.min_char; i < 0x10000 && i <= charset.max_char;
820 i += 128)
821 CHARSET_FAST_MAP_SET (i, charset.fast_map);
822 for (; i <= charset.max_char; i += 0x1000)
823 CHARSET_FAST_MAP_SET (i, charset.fast_map);
824 }
825 else if (! NILP (args[charset_arg_map]))
826 {
827 val = args[charset_arg_map];
828 ASET (attrs, charset_map, val);
829 if (STRINGP (val))
e9ce014c
KH
830 load_charset_map_from_file (&charset, val, 0);
831 else
832 load_charset_map_from_vector (&charset, val, 0);
3263d5a2
KH
833 charset.method = CHARSET_METHOD_MAP_DEFERRED;
834 }
835 else if (! NILP (args[charset_arg_parents]))
836 {
837 val = args[charset_arg_parents];
838 CHECK_LIST (val);
839 charset.method = CHARSET_METHOD_INHERIT;
840 val = Fcopy_sequence (val);
841 ASET (attrs, charset_parents, val);
842
843 charset.min_char = MAX_CHAR;
844 charset.max_char = 0;
845 for (; ! NILP (val); val = Fcdr (val))
4ed46869 846 {
3263d5a2
KH
847 Lisp_Object elt, car_part, cdr_part;
848 int this_id, offset;
849 struct charset *this_charset;
850
851 elt = Fcar (val);
852 if (CONSP (elt))
853 {
854 car_part = XCAR (elt);
855 cdr_part = XCDR (elt);
856 CHECK_CHARSET_GET_ID (car_part, this_id);
857 CHECK_NUMBER (cdr_part);
858 offset = XINT (cdr_part);
859 }
860 else
4ed46869 861 {
3263d5a2
KH
862 CHECK_CHARSET_GET_ID (elt, this_id);
863 offset = 0;
4ed46869 864 }
3263d5a2
KH
865 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
866
867 this_charset = CHARSET_FROM_ID (this_id);
868 if (charset.min_char > this_charset->min_char)
869 charset.min_char = this_charset->min_char;
870 if (charset.max_char < this_charset->max_char)
871 charset.max_char = this_charset->max_char;
872 for (i = 0; i < 190; i++)
873 charset.fast_map[i] |= this_charset->fast_map[i];
4ed46869 874 }
4ed46869 875 }
3263d5a2
KH
876 else
877 error ("None of :code-offset, :map, :parents are specified");
4ed46869 878
3263d5a2
KH
879 val = args[charset_arg_unify_map];
880 if (! NILP (val) && !STRINGP (val))
881 CHECK_VECTOR (val);
882 ASET (attrs, charset_unify_map, val);
4ed46869 883
3263d5a2
KH
884 CHECK_LIST (args[charset_arg_plist]);
885 ASET (attrs, charset_plist, args[charset_arg_plist]);
4ed46869 886
3263d5a2
KH
887 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
888 &hash_code);
889 if (charset.hash_index >= 0)
890 {
891 new_definition_p = 0;
892 HASH_VALUE (hash_table, charset.hash_index) = attrs;
893 }
1a45ff10 894 else
3263d5a2
KH
895 {
896 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
897 hash_code);
898 if (charset_table_used == charset_table_size)
899 {
900 charset_table_size += 256;
901 charset_table
902 = ((struct charset *)
903 xrealloc (charset_table,
904 sizeof (struct charset) * charset_table_size));
905 }
906 id = charset_table_used++;
907 ASET (attrs, charset_id, make_number (id));
908 new_definition_p = 1;
909 }
4ed46869 910
4ed46869 911
3263d5a2
KH
912 charset.id = id;
913 charset_table[id] = charset;
914
915 if (charset.iso_final >= 0)
4ed46869 916 {
3263d5a2
KH
917 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
918 charset.iso_final) = id;
919 if (new_definition_p)
920 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
921 Fcons (make_number (id), Qnil));
4ed46869 922 }
3263d5a2
KH
923
924 if (charset.emacs_mule_id >= 0)
4ed46869 925 {
3263d5a2
KH
926 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
927 if (new_definition_p)
928 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
929 Fcons (make_number (id), Qnil));
4ed46869
KH
930 }
931
3263d5a2
KH
932 if (new_definition_p)
933 {
934 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
935 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
936 Fcons (make_number (id), Qnil));
937 }
4ed46869 938
3263d5a2 939 return Qnil;
4ed46869
KH
940}
941
3263d5a2
KH
942
943DEFUN ("define-charset-alias", Fdefine_charset_alias,
944 Sdefine_charset_alias, 2, 2, 0,
945 doc: /* Define ALIAS as an alias for charset CHARSET. */)
946 (alias, charset)
947 Lisp_Object alias, charset;
4ed46869 948{
3263d5a2
KH
949 Lisp_Object attr;
950
951 CHECK_CHARSET_GET_ATTR (charset, attr);
952 Fputhash (alias, attr, Vcharset_hash_table);
953 return Qnil;
954}
4ed46869 955
4ed46869 956
3263d5a2
KH
957DEFUN ("primary-charset", Fprimary_charset, Sprimary_charset, 0, 0, 0,
958 doc: /* Return the primary charset. */)
959 ()
960{
961 return CHARSET_NAME (CHARSET_FROM_ID (charset_primary));
962}
4ed46869 963
4ed46869 964
3263d5a2
KH
965DEFUN ("set-primary-charset", Fset_primary_charset, Sset_primary_charset,
966 1, 1, 0,
967 doc: /* Set the primary charset to CHARSET. */)
968 (charset)
969 Lisp_Object charset;
970{
971 int id;
972
973 CHECK_CHARSET_GET_ID (charset, id);
974 charset_primary = id;
4ed46869
KH
975 return Qnil;
976}
977
3263d5a2
KH
978
979DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
980 doc: /* Return a property list of CHARSET. */)
981 (charset)
982 Lisp_Object charset;
983{
984 Lisp_Object attrs;
985
986 CHECK_CHARSET_GET_ATTR (charset, attrs);
987 return CHARSET_ATTR_PLIST (attrs);
988}
989
990
991DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
992 doc: /* Set CHARSET's property list to PLIST. */)
993 (charset, plist)
994 Lisp_Object charset, plist;
995{
996 Lisp_Object attrs;
997
998 CHECK_CHARSET_GET_ATTR (charset, attrs);
999 CHARSET_ATTR_PLIST (attrs) = plist;
1000 return plist;
1001}
1002
1003
1004DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 2, 0,
1005 doc: /* Unify characters of CHARSET with Unicode. */)
1006 (charset, unify_map)
1007 Lisp_Object charset, unify_map;
8a73a704 1008{
3263d5a2
KH
1009 int id;
1010 struct charset *cs;
1011
1012 CHECK_CHARSET_GET_ID (charset, id);
1013 cs = CHARSET_FROM_ID (id);
1014 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1015 load_charset (cs);
1016 if (CHARSET_UNIFIED_P (cs)
1017 && CHAR_TABLE_P (CHARSET_DEUNIFIER (cs)))
1018 return Qnil;
1019 CHARSET_UNIFIED_P (cs) = 0;
1020 if (NILP (unify_map))
1021 unify_map = CHARSET_UNIFY_MAP (cs);
1022 if (STRINGP (unify_map))
e9ce014c
KH
1023 load_charset_map_from_file (cs, unify_map, 2);
1024 else
1025 load_charset_map_from_vector (cs, unify_map, 2);
3263d5a2
KH
1026 CHARSET_UNIFIED_P (cs) = 1;
1027 return Qnil;
8a73a704
KH
1028}
1029
3fac5a51
KH
1030DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1031 Sget_unused_iso_final_char, 2, 2, 0,
3263d5a2
KH
1032 doc: /*
1033Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
fdb82f93
PJ
1034DIMENSION is the number of bytes to represent a character: 1 or 2.
1035CHARS is the number of characters in a dimension: 94 or 96.
1036
1037This final char is for private use, thus the range is `0' (48) .. `?' (63).
3263d5a2 1038If there's no unused final char for the attrified kind of charset,
fdb82f93
PJ
1039return nil. */)
1040 (dimension, chars)
3fac5a51
KH
1041 Lisp_Object dimension, chars;
1042{
1043 int final_char;
1044
b7826503
PJ
1045 CHECK_NUMBER (dimension);
1046 CHECK_NUMBER (chars);
3263d5a2
KH
1047 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1048 args_out_of_range_3 (dimension, make_number (1), make_number (3));
3fac5a51 1049 if (XINT (chars) != 94 && XINT (chars) != 96)
3263d5a2 1050 args_out_of_range_3 (chars, make_number (94), make_number (96));
3fac5a51 1051 for (final_char = '0'; final_char <= '?'; final_char++)
3263d5a2
KH
1052 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1053 break;
3fac5a51
KH
1054 return (final_char <= '?' ? make_number (final_char) : Qnil);
1055}
1056
3263d5a2
KH
1057static void
1058check_iso_charset_parameter (dimension, chars, final_char)
1059 Lisp_Object dimension, chars, final_char;
4ed46869 1060{
3263d5a2
KH
1061 CHECK_NATNUM (dimension);
1062 CHECK_NATNUM (chars);
1063 CHECK_NATNUM (final_char);
4ed46869 1064
3263d5a2
KH
1065 if (XINT (dimension) > 3)
1066 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
4ed46869
KH
1067 if (XINT (chars) != 94 && XINT (chars) != 96)
1068 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
3263d5a2 1069 if (XINT (final_char) < '0' || XINT (final_char) > '~')
4ed46869 1070 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
3263d5a2
KH
1071}
1072
1073
1074DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1075 4, 4, 0,
1076 doc: /*
1077Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
1078CHARSET should be defined by `defined-charset' in advance. */)
1079 (dimension, chars, final_char, charset)
1080 Lisp_Object dimension, chars, final_char, charset;
1081{
1082 int id;
4ed46869 1083
3263d5a2
KH
1084 CHECK_CHARSET_GET_ID (charset, id);
1085 check_iso_charset_parameter (dimension, chars, final_char);
1086
1087 ISO_CHARSET_TABLE (dimension, chars, final_char) = id;
4ed46869
KH
1088 return Qnil;
1089}
1090
3263d5a2 1091
2e344af3
KH
1092/* Return information about charsets in the text at PTR of NBYTES
1093 bytes, which are NCHARS characters. The value is:
f6302ac9 1094
cfe34140 1095 0: Each character is represented by one byte. This is always
3263d5a2
KH
1096 true for a unibyte string. For a multibyte string, true if
1097 it contains only ASCII characters.
1098
1099 1: No charsets other than ascii, eight-bit-control, and
1100 latin-1 are found.
1d67c29b 1101
3263d5a2
KH
1102 2: Otherwise.
1103*/
4ed46869
KH
1104
1105int
3263d5a2
KH
1106string_xstring_p (string)
1107 Lisp_Object string;
4ed46869 1108{
3263d5a2
KH
1109 unsigned char *p = XSTRING (string)->data;
1110 unsigned char *endp = p + STRING_BYTES (XSTRING (string));
1111 struct charset *charset;
1112
1113 if (XSTRING (string)->size == STRING_BYTES (XSTRING (string)))
1114 return 0;
1115
1116 charset = CHARSET_FROM_ID (charset_iso_8859_1);
1117 while (p < endp)
0282eb69 1118 {
3263d5a2 1119 int c = STRING_CHAR_ADVANCE (p);
2e344af3 1120
3263d5a2
KH
1121 if (ENCODE_CHAR (charset, c) < 0)
1122 return 2;
0282eb69 1123 }
3263d5a2
KH
1124 return 1;
1125}
05505664 1126
05505664 1127
3263d5a2 1128/* Find charsets in the string at PTR of NCHARS and NBYTES.
4ed46869 1129
3263d5a2
KH
1130 CHARSETS is a vector. Each element is a cons of CHARSET and
1131 FOUND-FLAG. CHARSET is a charset id, and FOUND-FLAG is nil or t.
1132 FOUND-FLAG t (or nil) means that the corresponding charset is
1133 already found (or not yet found).
2e344af3 1134
3263d5a2 1135 It may lookup a translation table TABLE if supplied. */
2e344af3 1136
3263d5a2
KH
1137static void
1138find_charsets_in_text (ptr, nchars, nbytes, charsets, table)
1139 unsigned char *ptr;
1140 int nchars, nbytes;
1141 Lisp_Object charsets, table;
1142{
1143 unsigned char *pend = ptr + nbytes;
1144 int ncharsets = ASIZE (charsets);
1145
1146 if (nchars == nbytes)
1147 return;
1148
1149 while (ptr < pend)
1150 {
1151 int c = STRING_CHAR_ADVANCE (ptr);
1152 int i;
1153 int all_found = 1;
1154 Lisp_Object elt;
1155
1156 if (!NILP (table))
1157 c = translate_char (table, c);
1158 for (i = 0; i < ncharsets; i++)
1159 {
1160 elt = AREF (charsets, i);
1161 if (NILP (XCDR (elt)))
1162 {
1163 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (elt)));
1164
1165 if (ENCODE_CHAR (charset, c) != CHARSET_INVALID_CODE (charset))
1166 XCDR (elt) = Qt;
1167 else
1168 all_found = 0;
1169 }
4ed46869 1170 }
3263d5a2
KH
1171 if (all_found)
1172 break;
4ed46869 1173 }
4ed46869
KH
1174}
1175
3263d5a2 1176
4ed46869 1177DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
23d2a7f1 1178 2, 3, 0,
fdb82f93
PJ
1179 doc: /* Return a list of charsets in the region between BEG and END.
1180BEG and END are buffer positions.
1181Optional arg TABLE if non-nil is a translation table to look up.
1182
1183If the region contains invalid multibyte characters,
1184`unknown' is included in the returned list.
1185
1186If the current buffer is unibyte, the returned list may contain
1187only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1188 (beg, end, table)
23d2a7f1 1189 Lisp_Object beg, end, table;
4ed46869 1190{
3263d5a2 1191 Lisp_Object charsets;
6ae1f27e 1192 int from, from_byte, to, stop, stop_byte, i;
4ed46869
KH
1193 Lisp_Object val;
1194
1195 validate_region (&beg, &end);
1196 from = XFASTINT (beg);
1197 stop = to = XFASTINT (end);
6ae1f27e 1198
4ed46869 1199 if (from < GPT && GPT < to)
6ae1f27e
RS
1200 {
1201 stop = GPT;
1202 stop_byte = GPT_BYTE;
1203 }
1204 else
1205 stop_byte = CHAR_TO_BYTE (stop);
1206
1207 from_byte = CHAR_TO_BYTE (from);
1208
3263d5a2
KH
1209 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1210 for (i = 0; i < charset_table_used; i++)
1211 ASET (charsets, i, Fcons (make_number (i), Qnil));
1212
4ed46869
KH
1213 while (1)
1214 {
3263d5a2
KH
1215 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1216 stop_byte - from_byte, charsets, table);
4ed46869 1217 if (stop < to)
6ae1f27e
RS
1218 {
1219 from = stop, from_byte = stop_byte;
1220 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1221 }
4ed46869
KH
1222 else
1223 break;
1224 }
6ae1f27e 1225
4ed46869 1226 val = Qnil;
3263d5a2
KH
1227 for (i = charset_table_used - 1; i >= 0; i--)
1228 if (!NILP (XCDR (AREF (charsets, i))))
1229 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1230 return val;
1231}
1232
1233DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
23d2a7f1 1234 1, 2, 0,
fdb82f93
PJ
1235 doc: /* Return a list of charsets in STR.
1236Optional arg TABLE if non-nil is a translation table to look up.
1237
1238If the string contains invalid multibyte characters,
1239`unknown' is included in the returned list.
1240
1241If STR is unibyte, the returned list may contain
3263d5a2 1242only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
fdb82f93 1243 (str, table)
23d2a7f1 1244 Lisp_Object str, table;
4ed46869 1245{
3263d5a2 1246 Lisp_Object charsets;
4ed46869
KH
1247 int i;
1248 Lisp_Object val;
1249
b7826503 1250 CHECK_STRING (str);
87b089ad 1251
3263d5a2
KH
1252 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1253 find_charsets_in_text (XSTRING (str)->data, XSTRING (str)->size,
1254 STRING_BYTES (XSTRING (str)), charsets, table);
2e344af3 1255
4ed46869 1256 val = Qnil;
3263d5a2
KH
1257 for (i = charset_table_used - 1; i >= 0; i--)
1258 if (!NILP (XCDR (AREF (charsets, i))))
1259 val = Fcons (CHARSET_NAME (charset_table + i), val);
4ed46869
KH
1260 return val;
1261}
2e344af3 1262
4ed46869 1263\f
3263d5a2
KH
1264
1265/* Return a character correponding to the code-point CODE of
1266 CHARSET. */
1267
1268int
1269decode_char (charset, code)
1270 struct charset *charset;
1271 unsigned code;
4ed46869 1272{
3263d5a2
KH
1273 int c, char_index;
1274 enum charset_method method = CHARSET_METHOD (charset);
ac4137cc 1275
3263d5a2
KH
1276 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1277 return -1;
4ed46869 1278
3263d5a2 1279 if (method == CHARSET_METHOD_MAP_DEFERRED)
ac4137cc 1280 {
3263d5a2
KH
1281 load_charset (charset);
1282 method = CHARSET_METHOD (charset);
ac4137cc 1283 }
4ed46869 1284
3263d5a2 1285 if (method == CHARSET_METHOD_INHERIT)
2e344af3 1286 {
3263d5a2 1287 Lisp_Object parents;
4ed46869 1288
3263d5a2
KH
1289 parents = CHARSET_PARENTS (charset);
1290 c = -1;
1291 for (; CONSP (parents); parents = XCDR (parents))
1292 {
1293 int id = XINT (XCAR (XCAR (parents)));
1294 int code_offset = XINT (XCDR (XCAR (parents)));
1295 unsigned this_code = code + code_offset;
9d3d8cba 1296
3263d5a2
KH
1297 charset = CHARSET_FROM_ID (id);
1298 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1299 break;
1300 }
1301 }
1302 else
ac4137cc 1303 {
3263d5a2 1304 char_index = CODE_POINT_TO_INDEX (charset, code);
69f8de5b
KH
1305 if (char_index < 0)
1306 return -1;
3263d5a2
KH
1307
1308 if (method == CHARSET_METHOD_MAP)
ac4137cc 1309 {
3263d5a2
KH
1310 Lisp_Object decoder;
1311
1312 decoder = CHARSET_DECODER (charset);
1313 if (! VECTORP (decoder))
1314 return -1;
1315 c = XINT (AREF (decoder, char_index));
ac4137cc
KH
1316 }
1317 else
1318 {
3263d5a2 1319 c = char_index + CHARSET_CODE_OFFSET (charset);
ac4137cc
KH
1320 }
1321 }
9d3d8cba 1322
3263d5a2
KH
1323 if (CHARSET_UNIFIED_P (charset)
1324 && c >= 0)
c449997d
KH
1325 {
1326 MAYBE_UNIFY_CHAR (c);
1327 }
d2665018 1328
3263d5a2 1329 return c;
d2665018
KH
1330}
1331
1bcc1567 1332
3263d5a2
KH
1333/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1334 CHARSET, return CHARSET_INVALID_CODE (CHARSET). */
1bcc1567 1335
3263d5a2
KH
1336unsigned
1337encode_char (charset, c)
1338 struct charset *charset;
9b6a601f
KH
1339 int c;
1340{
3263d5a2
KH
1341 unsigned code;
1342 enum charset_method method = CHARSET_METHOD (charset);
8ac5a9cc 1343
3263d5a2 1344 if (CHARSET_UNIFIED_P (charset))
4ed46869 1345 {
3263d5a2
KH
1346 Lisp_Object deunifier;
1347 int deunified;
4ed46869 1348
3263d5a2
KH
1349 deunifier = CHARSET_DEUNIFIER (charset);
1350 if (! CHAR_TABLE_P (deunifier))
1351 {
1352 Funify_charset (CHARSET_NAME (charset), Qnil);
1353 deunifier = CHARSET_DEUNIFIER (charset);
1354 }
1355 deunified = XINT (CHAR_TABLE_REF (deunifier, c));
1356 if (deunified > 0)
1357 c = deunified;
4ed46869 1358 }
beeedaad 1359
3263d5a2
KH
1360 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1361 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1362 return CHARSET_INVALID_CODE (charset);
beeedaad 1363
3263d5a2 1364 if (method == CHARSET_METHOD_INHERIT)
859f2b3c 1365 {
3263d5a2 1366 Lisp_Object parents;
859f2b3c 1367
3263d5a2
KH
1368 parents = CHARSET_PARENTS (charset);
1369 for (; CONSP (parents); parents = XCDR (parents))
beeedaad 1370 {
3263d5a2
KH
1371 int id = XINT (XCAR (XCAR (parents)));
1372 int code_offset = XINT (XCDR (XCAR (parents)));
1373 struct charset *this_charset = CHARSET_FROM_ID (id);
beeedaad 1374
3263d5a2
KH
1375 code = ENCODE_CHAR (this_charset, c);
1376 if (code != CHARSET_INVALID_CODE (this_charset)
1377 && (code_offset < 0 || code >= code_offset))
1378 {
1379 code -= code_offset;
69f8de5b
KH
1380 if (code >= charset->min_code && code <= charset->max_code
1381 && CODE_POINT_TO_INDEX (charset, code) >= 0)
3263d5a2
KH
1382 return code;
1383 }
beeedaad 1384 }
3263d5a2
KH
1385 return CHARSET_INVALID_CODE (charset);
1386 }
99529c2c 1387
3263d5a2 1388 if (method == CHARSET_METHOD_MAP_DEFERRED)
beeedaad 1389 {
3263d5a2
KH
1390 load_charset (charset);
1391 method = CHARSET_METHOD (charset);
859f2b3c 1392 }
beeedaad 1393
3263d5a2 1394 if (method == CHARSET_METHOD_MAP)
3f62427c 1395 {
3263d5a2 1396 Lisp_Object encoder;
beeedaad 1397 Lisp_Object val;
beeedaad 1398
3263d5a2
KH
1399 encoder = CHARSET_ENCODER (charset);
1400 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1401 return CHARSET_INVALID_CODE (charset);
1402 val = CHAR_TABLE_REF (encoder, c);
e9ce014c
KH
1403 code = XINT (val);
1404 if (! CHARSET_COMPACT_CODES_P (charset))
1405 code = INDEX_TO_CODE_POINT (charset, code);
3263d5a2
KH
1406 }
1407 else
beeedaad 1408 {
3263d5a2
KH
1409 code = c - CHARSET_CODE_OFFSET (charset);
1410 code = INDEX_TO_CODE_POINT (charset, code);
3f62427c 1411 }
beeedaad 1412
3263d5a2 1413 return code;
3f62427c
KH
1414}
1415
4ed46869 1416
3263d5a2
KH
1417DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1418 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1419Return nil if CODE-POINT is not valid in CHARSET.
4ed46869 1420
3263d5a2
KH
1421CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1422
1423Optional argument RESTRICTION specifies a way to map the pair of CCS
1424and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1425 (charset, code_point, restriction)
1426 Lisp_Object charset, code_point, restriction;
4ed46869 1427{
3263d5a2
KH
1428 int c, id;
1429 unsigned code;
1430 struct charset *charsetp;
4ed46869 1431
3263d5a2
KH
1432 CHECK_CHARSET_GET_ID (charset, id);
1433 if (CONSP (code_point))
1434 {
1435 CHECK_NATNUM (XCAR (code_point));
1436 CHECK_NATNUM (XCDR (code_point));
69f8de5b 1437 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
3263d5a2
KH
1438 }
1439 else
1440 {
1441 CHECK_NATNUM (code_point);
1442 code = XINT (code_point);
1443 }
1444 charsetp = CHARSET_FROM_ID (id);
1445 c = DECODE_CHAR (charsetp, code);
1446 return (c >= 0 ? make_number (c) : Qnil);
4ed46869
KH
1447}
1448
046b1f03 1449
3263d5a2
KH
1450DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1451 doc: /* Encode the character CH into a code-point of CHARSET.
1452Return nil if CHARSET doesn't include CH.
17e7ef1b 1453
3263d5a2
KH
1454Optional argument RESTRICTION specifies a way to map CHAR to a
1455code-point in CCS. Currently not supported and just ignored. */)
1456 (ch, charset, restriction)
1457 Lisp_Object ch, charset, restriction;
1458{
1459 int c, id;
1460 unsigned code;
1461 struct charset *charsetp;
046b1f03 1462
3263d5a2
KH
1463 CHECK_CHARSET_GET_ID (charset, id);
1464 CHECK_NATNUM (ch);
1465 c = XINT (ch);
1466 charsetp = CHARSET_FROM_ID (id);
1467 code = ENCODE_CHAR (charsetp, ch);
1468 if (code == CHARSET_INVALID_CODE (charsetp))
1469 return Qnil;
1470 if (code > 0x7FFFFFF)
1471 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1472 return make_number (code);
6ae1f27e 1473}
9036eb45 1474
87b089ad 1475
b121a744
KH
1476DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1477 doc:
1478 /* Return a character of CHARSET whose position codes are CODEn.
1479
1480CODE1 through CODE4 are optional, but if you don't supply sufficient
1481position codes, it is assumed that the minimum code in each dimension
1482are specified. */)
1483 (charset, code1, code2, code3, code4)
1484 Lisp_Object charset, code1, code2, code3, code4;
87b089ad 1485{
3263d5a2
KH
1486 int id, dimension;
1487 struct charset *charsetp;
b121a744
KH
1488 unsigned code;
1489 int c;
87b089ad 1490
3263d5a2
KH
1491 CHECK_CHARSET_GET_ID (charset, id);
1492 charsetp = CHARSET_FROM_ID (id);
87b089ad 1493
3263d5a2 1494 if (NILP (code))
b121a744
KH
1495 return make_number (CHARSET_MIN_CHAR (charsetp));
1496
1497 dimension = CHARSET_DIMENSION (charsetp);
1498 if (NILP (code1))
1499 code = charsetp->code_space[(dimension - 1) * 4];
3263d5a2 1500 else
87b089ad 1501 {
b121a744
KH
1502 CHECK_NATNUM (code1);
1503 if (XFASTINT (code1) >= 0x100)
1504 args_out_of_range (make_number (0xFF), code1);
1505 code = XFASTINT (code1);
1506 }
1507 if (dimension > 1)
1508 {
1509 code <<= 8;
1510 if (NILP (code2))
1511 code |= charsetp->code_space[(dimension - 2) * 4];
1512 else
3263d5a2
KH
1513 {
1514 CHECK_NATNUM (code2);
b121a744
KH
1515 if (XFASTINT (code2) >= 0x100)
1516 args_out_of_range (make_number (0xFF), code2);
1517 code |= XFASTINT (code2);
3263d5a2 1518 }
2e344af3 1519
b121a744
KH
1520 if (dimension > 2)
1521 {
1522 code <<= 8;
1523 if (NILP (code3))
1524 code |= charsetp->code_space[(dimension - 3) * 4];
1525 else
1526 {
1527 CHECK_NATNUM (code3);
1528 if (XFASTINT (code3) >= 0x100)
1529 args_out_of_range (make_number (0xFF), code3);
1530 code |= XFASTINT (code3);
1531 }
1532
1533 if (dimension > 3)
1534 {
1535 code <<= 8;
1536 if (NILP (code4))
1537 code |= charsetp->code_space[0];
1538 else
1539 {
1540 CHECK_NATNUM (code4);
1541 if (XFASTINT (code4) >= 0x100)
1542 args_out_of_range (make_number (0xFF), code4);
1543 code |= XFASTINT (code4);
1544 }
1545 }
1546 }
1547 }
3263d5a2 1548
b121a744
KH
1549 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1550 code &= 0x7F7F7F7F;
1551 c = DECODE_CHAR (charsetp, code);
1552 if (c < 0)
1553 error ("Invalid code(s)");
3263d5a2 1554 return make_number (c);
2e344af3
KH
1555}
1556
3263d5a2
KH
1557
1558/* Return the first charset in CHARSET_LIST that contains C.
1559 CHARSET_LIST is a list of charset IDs. If it is nil, use
1560 Vcharset_ordered_list. */
1561
1562struct charset *
1563char_charset (c, charset_list, code_return)
1564 int c;
1565 Lisp_Object charset_list;
1566 unsigned *code_return;
2e344af3 1567{
3263d5a2
KH
1568 if (NILP (charset_list))
1569 charset_list = Vcharset_ordered_list;
2e344af3 1570
3263d5a2 1571 while (CONSP (charset_list))
2e344af3 1572 {
3263d5a2
KH
1573 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1574 unsigned code = ENCODE_CHAR (charset, c);
1575
1576 if (code != CHARSET_INVALID_CODE (charset))
1577 {
1578 if (code_return)
1579 *code_return = code;
1580 return charset;
1581 }
1582 charset_list = XCDR (charset_list);
2e344af3 1583 }
3263d5a2 1584 return NULL;
2e344af3
KH
1585}
1586
2e344af3 1587
3263d5a2
KH
1588DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1589 doc: /*Return list of charset and one or two position-codes of CHAR.
1590If CHAR is invalid as a character code,
1591return a list of symbol `unknown' and CHAR. */)
1592 (ch)
1593 Lisp_Object ch;
2e344af3 1594{
3263d5a2
KH
1595 struct charset *charset;
1596 int c, dimension;
1597 unsigned code;
1598 Lisp_Object val;
1599
1600 CHECK_CHARACTER (ch);
1601 c = XFASTINT (ch);
1602 charset = CHAR_CHARSET (c);
1603 if (! charset)
1604 return Fcons (intern ("unknown"), Fcons (ch, Qnil));
1605
1606 code = ENCODE_CHAR (charset, c);
1607 if (code == CHARSET_INVALID_CODE (charset))
1608 abort ();
1609 dimension = CHARSET_DIMENSION (charset);
1610 val = (dimension == 1 ? Fcons (make_number (code), Qnil)
1611 : dimension == 2 ? Fcons (make_number (code >> 8),
1612 Fcons (make_number (code & 0xFF), Qnil))
1613 : Fcons (make_number (code >> 16),
1614 Fcons (make_number ((code >> 8) & 0xFF),
1615 Fcons (make_number (code & 0xFF), Qnil))));
1616 return Fcons (CHARSET_NAME (charset), val);
2e344af3 1617}
87b089ad 1618
740f080d 1619
3263d5a2
KH
1620DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1621 doc: /* Return the charset of highest priority that contains CHAR. */)
1622 (ch)
1623 Lisp_Object ch;
740f080d 1624{
3263d5a2 1625 struct charset *charset;
740f080d 1626
3263d5a2
KH
1627 CHECK_CHARACTER (ch);
1628 charset = CHAR_CHARSET (XINT (ch));
1629 return (CHARSET_NAME (charset));
740f080d
KH
1630}
1631
2e344af3 1632
3263d5a2
KH
1633DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1634 doc: /*
1635Return charset of a character in the current buffer at position POS.
1636If POS is nil, it defauls to the current point.
1637If POS is out of range, the value is nil. */)
1638 (pos)
1639 Lisp_Object pos;
2e344af3 1640{
3263d5a2
KH
1641 Lisp_Object ch;
1642 struct charset *charset;
1643
1644 ch = Fchar_after (pos);
1645 if (! INTEGERP (ch))
1646 return ch;
1647 charset = CHAR_CHARSET (XINT (ch));
1648 return (CHARSET_NAME (charset));
87b089ad
RS
1649}
1650
2e344af3 1651
3263d5a2
KH
1652DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1653 doc: /*
1654Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1655
1656ISO 2022's designation sequence (escape sequence) distinguishes charsets
1657by their DIMENSION, CHARS, and FINAL-CHAR,
1658where as Emacs distinguishes them by charset symbol.
1659See the documentation of the function `charset-info' for the meanings of
1660DIMENSION, CHARS, and FINAL-CHAR. */)
1661 (dimension, chars, final_char)
1662 Lisp_Object dimension, chars, final_char;
2e344af3 1663{
3263d5a2 1664 int id;
2e344af3 1665
3263d5a2
KH
1666 check_iso_charset_parameter (dimension, chars, final_char);
1667 id = ISO_CHARSET_TABLE (XFASTINT (dimension), XFASTINT (chars),
1668 XFASTINT (final_char));
1669 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2e344af3
KH
1670}
1671
3263d5a2
KH
1672
1673DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1674 0, 0, 0,
1675 doc: /*
1676Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1677 ()
4ed46869 1678{
53316e55 1679 int i;
3263d5a2
KH
1680 struct charset *charset;
1681 Lisp_Object attrs;
4ed46869 1682
3263d5a2 1683 for (i = 0; i < charset_table_used; i++)
4ed46869 1684 {
3263d5a2
KH
1685 charset = CHARSET_FROM_ID (i);
1686 attrs = CHARSET_ATTRIBUTES (charset);
1687
1688 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1689 {
1690 CHARSET_ATTR_DECODER (attrs) = Qnil;
1691 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1692 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1693 }
1694
1695 if (CHARSET_UNIFIED_P (charset))
1696 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
5729c92f
KH
1697 }
1698
3263d5a2 1699 if (CHAR_TABLE_P (Vchar_unified_charset_table))
5729c92f 1700 {
3263d5a2
KH
1701 Foptimize_char_table (Vchar_unified_charset_table);
1702 Vchar_unify_table = Vchar_unified_charset_table;
1703 Vchar_unified_charset_table = Qnil;
4ed46869
KH
1704 }
1705
3263d5a2 1706 return Qnil;
4ed46869
KH
1707}
1708
4ed46869 1709\f
3263d5a2
KH
1710void
1711init_charset ()
4ed46869 1712{
4ed46869 1713
4ed46869
KH
1714}
1715
4ed46869 1716
dfcf069d 1717void
4ed46869
KH
1718init_charset_once ()
1719{
1720 int i, j, k;
1721
3263d5a2
KH
1722 for (i = 0; i < ISO_MAX_DIMENSION; i++)
1723 for (j = 0; j < ISO_MAX_CHARS; j++)
1724 for (k = 0; k < ISO_MAX_FINAL; k++)
1725 iso_charset_table[i][j][k] = -1;
1726
1727 for (i = 0; i < 255; i++)
1728 emacs_mule_charset[i] = NULL;
4ed46869 1729
3263d5a2
KH
1730#if 0
1731 Vchar_charset_set = Fmake_char_table (Qnil, Qnil);
1732 CHAR_TABLE_SET (Vchar_charset_set, make_number (97), Qnil);
1733
1734 DEFSYM (Qcharset_encode_table, "charset-encode-table");
4ed46869
KH
1735
1736 /* Intern this now in case it isn't already done.
1737 Setting this variable twice is harmless.
1738 But don't staticpro it here--that is done in alloc.c. */
1739 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1740
3263d5a2
KH
1741 /* Now we are ready to set up this property, so we can create syntax
1742 tables. */
1743 Fput (Qcharset_encode_table, Qchar_table_extra_slots, make_number (0));
1744#endif
4ed46869
KH
1745}
1746
1747#ifdef emacs
1748
dfcf069d 1749void
4ed46869
KH
1750syms_of_charset ()
1751{
3263d5a2
KH
1752 char *p;
1753
1754 DEFSYM (Qcharsetp, "charsetp");
1755
1756 DEFSYM (Qascii, "ascii");
1757 DEFSYM (Qunicode, "unicode");
1758 DEFSYM (Qeight_bit_control, "eight-bit-control");
1759 DEFSYM (Qeight_bit_graphic, "eight-bit-graphic");
1760 DEFSYM (Qiso_8859_1, "iso-8859-1");
1761
1762 DEFSYM (Qgl, "gl");
1763 DEFSYM (Qgr, "gr");
1764
1765 p = (char *) xmalloc (30000);
1766
1767 staticpro (&Vcharset_ordered_list);
1768 Vcharset_ordered_list = Qnil;
1769
1770 staticpro (&Viso_2022_charset_list);
1771 Viso_2022_charset_list = Qnil;
1772
1773 staticpro (&Vemacs_mule_charset_list);
1774 Vemacs_mule_charset_list = Qnil;
1775
1776 staticpro (&Vcharset_hash_table);
1777 Vcharset_hash_table = Fmakehash (Qeq);
1778
1779 charset_table_size = 128;
1780 charset_table = ((struct charset *)
1781 xmalloc (sizeof (struct charset) * charset_table_size));
1782 charset_table_used = 0;
1783
1784 staticpro (&Vchar_unified_charset_table);
1785 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
1786
1787 defsubr (&Scharsetp);
1788 defsubr (&Smap_charset_chars);
1789 defsubr (&Sdefine_charset_internal);
1790 defsubr (&Sdefine_charset_alias);
1791 defsubr (&Sprimary_charset);
1792 defsubr (&Sset_primary_charset);
1793 defsubr (&Scharset_plist);
1794 defsubr (&Sset_charset_plist);
1795 defsubr (&Sunify_charset);
3fac5a51 1796 defsubr (&Sget_unused_iso_final_char);
4ed46869
KH
1797 defsubr (&Sdeclare_equiv_charset);
1798 defsubr (&Sfind_charset_region);
1799 defsubr (&Sfind_charset_string);
3263d5a2
KH
1800 defsubr (&Sdecode_char);
1801 defsubr (&Sencode_char);
4ed46869 1802 defsubr (&Ssplit_char);
3263d5a2 1803 defsubr (&Smake_char);
4ed46869 1804 defsubr (&Schar_charset);
90d7b74e 1805 defsubr (&Scharset_after);
4ed46869 1806 defsubr (&Siso_charset);
3263d5a2
KH
1807 defsubr (&Sclear_charset_maps);
1808
1809 DEFVAR_LISP ("charset-map-directory", &Vcharset_map_directory,
1810 doc: /* Directory of charset map files that come with GNU Emacs.
1811The default value is \"\\[data-directory]/charsets\". */);
1812 Vcharset_map_directory = Fexpand_file_name (build_string ("charsets"),
1813 Vdata_directory);
4ed46869
KH
1814
1815 DEFVAR_LISP ("charset-list", &Vcharset_list,
fdb82f93 1816 doc: /* List of charsets ever defined. */);
3263d5a2
KH
1817 Vcharset_list = Qnil;
1818
1819 /* Make the prerequisite charset `ascii' and `unicode'. */
1820 {
1821 Lisp_Object args[charset_arg_max];
1822 Lisp_Object plist[14];
1823 Lisp_Object val;
1824
1825 plist[0] = intern (":name");
1826 plist[1] = args[charset_arg_name] = Qascii;
1827 plist[2] = intern (":dimension");
1828 plist[3] = args[charset_arg_dimension] = make_number (1);
1829 val = Fmake_vector (make_number (8), make_number (0));
1830 ASET (val, 1, make_number (127));
1831 plist[4] = intern (":code-space");
1832 plist[5] = args[charset_arg_code_space] = val;
1833 plist[6] = intern (":iso-final-char");
1834 plist[7] = args[charset_arg_iso_final] = make_number ('B');
1835 args[charset_arg_iso_revision] = Qnil;
1836 plist[8] = intern (":emacs-mule-id");
1837 plist[9] = args[charset_arg_emacs_mule_id] = make_number (0);
1838 plist[10] = intern (":ascii-compatible-p");
1839 plist[11] = args[charset_arg_ascii_compatible_p] = Qt;
1840 args[charset_arg_supplementary_p] = Qnil;
1841 args[charset_arg_invalid_code] = Qnil;
1842 plist[12] = intern (":code-offset");
1843 plist[13] = args[charset_arg_code_offset] = make_number (0);
1844 args[charset_arg_map] = Qnil;
1845 args[charset_arg_parents] = Qnil;
1846 args[charset_arg_unify_map] = Qnil;
1847 /* The actual plist is set by mule-conf.el. */
1848 args[charset_arg_plist] = Flist (14, plist);
1849 Fdefine_charset_internal (charset_arg_max, args);
1850 charset_ascii = CHARSET_SYMBOL_ID (Qascii);
1851
1852 plist[1] = args[charset_arg_name] = Qunicode;
1853 plist[3] = args[charset_arg_dimension] = make_number (3);
1854 val = Fmake_vector (make_number (8), make_number (0));
1855 ASET (val, 1, make_number (255));
1856 ASET (val, 3, make_number (255));
1857 ASET (val, 5, make_number (16));
1858 plist[5] = args[charset_arg_code_space] = val;
1859 plist[7] = args[charset_arg_iso_final] = Qnil;
1860 args[charset_arg_iso_revision] = Qnil;
1861 plist[9] = args[charset_arg_emacs_mule_id] = Qnil;
1862 plist[11] = args[charset_arg_ascii_compatible_p] = Qt;
1863 args[charset_arg_supplementary_p] = Qnil;
1864 args[charset_arg_invalid_code] = Qnil;
1865 plist[13] = args[charset_arg_code_offset] = make_number (0);
1866 args[charset_arg_map] = Qnil;
1867 args[charset_arg_parents] = Qnil;
1868 args[charset_arg_unify_map] = Qnil;
1869 /* The actual plist is set by mule-conf.el. */
1870 args[charset_arg_plist] = Flist (14, plist);
1871 Fdefine_charset_internal (charset_arg_max, args);
1872 charset_unicode = CHARSET_SYMBOL_ID (Qunicode);
1873 }
4ed46869
KH
1874}
1875
1876#endif /* emacs */